mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-14 22:07:05 +01:00
Compare commits
605 Commits
ckelly-dec
...
v0.6.0
Author | SHA1 | Date | |
---|---|---|---|
c363bdd784 | |||
604f0ea2f6 | |||
42c912f608 | |||
33dc1f51b5 | |||
9576f0903d | |||
34cf702b24 | |||
8a5e3a917c | |||
65bcf281d0 | |||
cd0be8cb24 | |||
3d2a22a14d | |||
a26adfb090 | |||
f6e1a5b348 | |||
c5a025d421 | |||
50d277d8d9 | |||
343f3e829f | |||
f85b35314d | |||
3dc2e05d6e | |||
0cff8754d1 | |||
afc8d3e524 | |||
692b44dac1 | |||
96ba42a297 | |||
7df940dc3e | |||
f7b60004f3 | |||
8af8b047fd | |||
6592078fef | |||
ad971ca07b | |||
f2f16eb972 | |||
b7d55f7dfb | |||
6e548a8ad5 | |||
a5dd4a9bab | |||
ec232af851 | |||
17e30281e9 | |||
2854e601e6 | |||
aee44dc694 | |||
75bbf6a0af | |||
c65d23935a | |||
92cd797636 | |||
111bfbc6bc | |||
f41a230b32 | |||
c067051d5f | |||
afdeb2b13c | |||
9e2ec2719b | |||
757a928f9a | |||
bc248b6948 | |||
ae8561892e | |||
32375aca65 | |||
bb94ddd0eb | |||
c2d78493c8 | |||
7f0fc0eff5 | |||
791cb050c8 | |||
d5e95bc350 | |||
6efac3a252 | |||
7a84906b5f | |||
07416e4567 | |||
66d832c733 | |||
e74417ca12 | |||
7bd0084b5d | |||
e8c3174ae2 | |||
9b066e94d0 | |||
618abdf302 | |||
e1042aef77 | |||
aa6a839c60 | |||
ac99a56237 | |||
b4d2af8c89 | |||
434af6aeaa | |||
e90f8ac841 | |||
a1705a8d53 | |||
ca21003f01 | |||
14ddf2c234 | |||
bca861e112 | |||
33d199a0ad | |||
93896ce59e | |||
b1508e4124 | |||
b820076b91 | |||
09f66100d3 | |||
d7d92af09d | |||
460d0753a1 | |||
8f8058f8a5 | |||
d97a27f483 | |||
7c3363b91e | |||
b94478fa51 | |||
13bf0482e3 | |||
a795b5705e | |||
392e064513 | |||
b6a65059a2 | |||
ea25a4d9ac | |||
c190221fd3 | |||
0fcd2e7188 | |||
910b8dd6a1 | |||
75ebd3a0d1 | |||
7c8f79b147 | |||
09fd5c43a7 | |||
462921e549 | |||
f22317748f | |||
6a9eae6b6b | |||
fad96cf250 | |||
f331809c27 | |||
bd6a228af6 | |||
63d219498b | |||
2c54a53d0a | |||
306160ad9a | |||
20a091c3ed | |||
202078eb1b | |||
a762b1fb71 | |||
5b5925b8e5 | |||
b58adc6a4b | |||
f9d5e95d72 | |||
4f8e636a43 | |||
9b39f35ae6 | |||
5fe2b85cbd | |||
c7cccaaa69 | |||
cbcfea466f | |||
4955672fc3 | |||
39f1c880b8 | |||
8c043da5b7 | |||
3cbe974eb4 | |||
997fd882ff | |||
7af9b87318 | |||
811ca45473 | |||
bc1a4d40ba | |||
c8079e6621 | |||
8b0d171c9a | |||
1f293b76b4 | |||
8bbd9ebc27 | |||
6472b431f0 | |||
bd205a3293 | |||
496beffa88 | |||
9b63e97108 | |||
81f2aeaece | |||
2d4a45c758 | |||
a123dcd7e9 | |||
6b27c42dfe | |||
f7c2aa3ba5 | |||
0f182f033b | |||
7240d73184 | |||
42cd148f5e | |||
6e01264bb7 | |||
6f408256bc | |||
8d11681aac | |||
3d5c9a1ee9 | |||
db749f103f | |||
dc389e467c | |||
3619167d62 | |||
96f1d1b828 | |||
657e0a8f4d | |||
616e7cd83e | |||
6f26d2e8d4 | |||
c014574504 | |||
d7ce164e6e | |||
c0d5b99016 | |||
09ca32d678 | |||
082ae350c6 | |||
611b5d74ba | |||
b56c9ffa52 | |||
70c32fa49b | |||
77c8a94dae | |||
2e453dfbf5 | |||
4089984431 | |||
98439847cf | |||
c78bbd0f8c | |||
7ea4b959a4 | |||
536e2ff073 | |||
798ff34d7e | |||
87acd06990 | |||
9353b6edfe | |||
167cc2650e | |||
34f887ca1c | |||
7089b6d5a5 | |||
2ba7d43ddd | |||
836e929565 | |||
b6713ecb60 | |||
52a39f0fcd | |||
81a7a03076 | |||
16b37b956c | |||
567b6cf23f | |||
296396646d | |||
04a437c92c | |||
5c190a1b8c | |||
15d8f5c88c | |||
c4ac6e7e8f | |||
510e340e16 | |||
6ffadca153 | |||
b6597b74e7 | |||
d2573189d8 | |||
65ca174dbb | |||
0724f7af75 | |||
2e74520821 | |||
6dd75ad9e5 | |||
fda408ee6f | |||
b9c80318a2 | |||
5df5d52d41 | |||
f76f281e58 | |||
aa20cc8b52 | |||
0fd179fb33 | |||
f45ef8d114 | |||
7422953e36 | |||
8535d433a7 | |||
b573d1f35a | |||
0c1d7e4daf | |||
02e983a0cd | |||
d15ab66aae | |||
9005b82c6d | |||
3475f45ce7 | |||
0744f38866 | |||
62febd2823 | |||
fd5614738d | |||
005dcc51aa | |||
655c893f86 | |||
843f5783b4 | |||
8986c9fedd | |||
c80a1d427c | |||
ae57032500 | |||
f75468728f | |||
5acd856663 | |||
b0d3e4bb2c | |||
b512ccbee6 | |||
8c89391c02 | |||
bfac5195b8 | |||
a782ca3238 | |||
744691097f | |||
ff6da364e8 | |||
4d11a6f5f2 | |||
88be3b39bb | |||
8a02824e08 | |||
356e7940fd | |||
73ce476890 | |||
29c4ef41de | |||
e423a09974 | |||
17097a93ec | |||
94a6373a7f | |||
4ab7dbfd57 | |||
90e70790f3 | |||
9c2e8d5e28 | |||
147e2025b9 | |||
573b8c6020 | |||
15218ec57f | |||
ec68e08dd2 | |||
fc25d2295c | |||
8dc2cfcedb | |||
836f93780c | |||
5a68715be3 | |||
32bc7a6ab8 | |||
b65e72e521 | |||
d1aaff65e8 | |||
93d29bb699 | |||
3b376ed54e | |||
d5c1f614ba | |||
2edc24225d | |||
629283726b | |||
6adb66dd08 | |||
5be92bb708 | |||
f4c049ea6d | |||
bc092ad30f | |||
dad642ed1b | |||
63ae39abc7 | |||
9e5b934d21 | |||
a7b483d67a | |||
bb99ce0680 | |||
83307df1af | |||
49b5c49851 | |||
e9f30cab2c | |||
089f0ab582 | |||
df6c9f55d1 | |||
b93e18ed50 | |||
9c77bb69a5 | |||
27f3ecc833 | |||
f9e90eeb1f | |||
fad5c675eb | |||
4908b77d46 | |||
f4dd5062d7 | |||
da34d75841 | |||
980ff18956 | |||
7edf4c6c04 | |||
1a6c7204ac | |||
49310fbab3 | |||
6049d5ac47 | |||
35d0d35238 | |||
c0e878705e | |||
5c0c8efb9e | |||
dfd714e1ef | |||
79a8ca1a62 | |||
fb45eb2eb2 | |||
a307274c96 | |||
3f2c44a5fe | |||
48fb1cdc11 | |||
8a79e93cc2 | |||
3493b51879 | |||
de3e79d300 | |||
dd62a61c5c | |||
8f47d0b5ab | |||
42af132dab | |||
9db2c6525d | |||
adbc7c1188 | |||
9dc345e8e8 | |||
8b9301a74c | |||
6f47fbb1e2 | |||
a9ae30f868 | |||
a3c0fb79b6 | |||
62601bb649 | |||
ef97e32152 | |||
daea5297ee | |||
5028969d4b | |||
c667d9fdcc | |||
7dbb94bab2 | |||
236dcc820b | |||
a42a441a6a | |||
a0676beeb1 | |||
c5106d0c03 | |||
fbf96b1bbb | |||
3c49ddfaa4 | |||
ffb8b3116c | |||
290493e162 | |||
dd8cfff111 | |||
184642adb0 | |||
4774a3bcd2 | |||
25fafa9a89 | |||
713520d3d2 | |||
85ed8175cb | |||
df5c788ef2 | |||
15f22425c8 | |||
e87182cf98 | |||
e3d5319470 | |||
ffedeb1c58 | |||
3e3b367aa9 | |||
3e80947c2b | |||
fdfbf11c6d | |||
9cb90f714e | |||
6ce174cd60 | |||
17ca5240f7 | |||
2daffdf95d | |||
149f826601 | |||
cd8ee27080 | |||
0fa66e8f3c | |||
8dd099267d | |||
1a6d65c6a4 | |||
fc4a043663 | |||
61ba50665e | |||
446c768cd3 | |||
bfe14000a9 | |||
092fa0d8da | |||
1ceff48133 | |||
680645f849 | |||
3fc6e03ad1 | |||
2d6614f3a1 | |||
4e041b5103 | |||
712b9a3489 | |||
bdaa5b1767 | |||
8fcefc021a | |||
1445189361 | |||
05c884a62a | |||
a25bec87d9 | |||
2d8bb4c594 | |||
51cb2d4328 | |||
6d58cb2a68 | |||
c8b35d960c | |||
532f41dd61 | |||
661b0ab45d | |||
565e9329ba | |||
4bc08ed995 | |||
b2933a0557 | |||
db057cc276 | |||
22e88eaf54 | |||
09fe3caebd | |||
5e02392f9c | |||
17a8f51a9b | |||
1b7f88dd00 | |||
d6737e4bd8 | |||
d539888e57 | |||
86187d7cca | |||
87418e7df1 | |||
55f65b81b5 | |||
d9408893b3 | |||
05acc22920 | |||
8ac021de73 | |||
e503ef5590 | |||
a7682b0060 | |||
d4c9d71fc8 | |||
786ca52c43 | |||
048ac04abc | |||
f78d89bcbe | |||
53d06046b0 | |||
5d3a1a025d | |||
139cc5f1ae | |||
1c0e922585 | |||
9d5f693cbe | |||
5c90c3b457 | |||
91e04056f9 | |||
3789e3f31c | |||
0c66719210 | |||
3a5b5c8bec | |||
fdbe071213 | |||
4bc21ec7cb | |||
e3083b6dfc | |||
ab89418658 | |||
28cd99882c | |||
aceaee774c | |||
f8f9fd6f22 | |||
101aa769eb | |||
0bf99bfde5 | |||
64bf6fe54e | |||
1161d566b9 | |||
c698b16d75 | |||
c4c89336fe | |||
fa59789580 | |||
92c2c7d3b5 | |||
e99ce0875f | |||
cc1d9eb05b | |||
57c027fea2 | |||
207dc439a7 | |||
77ef0bba48 | |||
999b3a2e26 | |||
7ee577eee6 | |||
d27ceb75dd | |||
65c2b794b5 | |||
de82b08f70 | |||
1d03f515b9 | |||
1c4c287925 | |||
10bbfdc3b2 | |||
e15f0b47c1 | |||
0fd0661be3 | |||
6628806142 | |||
17198a4abd | |||
465e6f01b7 | |||
0eec752216 | |||
122195384e | |||
2ae1c14c03 | |||
0ddb7e707b | |||
e2d8f67f63 | |||
0d99f62027 | |||
c23375cd65 | |||
a762a0d9ff | |||
f7ca6ca889 | |||
ec4a9b7f6c | |||
5341977948 | |||
f0aed4672e | |||
344d251fc4 | |||
f6c53e5039 | |||
ba09cbae3e | |||
6aa000176f | |||
23b6172c31 | |||
ca5eebe10c | |||
3f128443ab | |||
1e554350ac | |||
c79ea0dcef | |||
e3f141f82f | |||
a6dfa2386b | |||
d9b5e66877 | |||
8fd8bc25e9 | |||
ba427abde9 | |||
9b6ab6db16 | |||
806a83d38b | |||
7223753355 | |||
b27bac4669 | |||
c8a93d6a93 | |||
04072a5e1f | |||
574ea4f843 | |||
f2ae9682ff | |||
587f80cd93 | |||
528eb773ad | |||
e5657510b0 | |||
f473919526 | |||
8f1b0afc2a | |||
1494b0f397 | |||
ab56ccdd25 | |||
cf2f69812b | |||
339be37dba | |||
c323425496 | |||
a87b744621 | |||
a646260e82 | |||
af9c8d1372 | |||
650e02b344 | |||
a524ca2a4b | |||
23a7176b71 | |||
b1192a8908 | |||
e8dddb1596 | |||
97d0d56bcb | |||
c7ba47bdc7 | |||
7c7ea35ffb | |||
4b1cf580e0 | |||
e67fc2be18 | |||
f473ef7591 | |||
f7b1060aed | |||
8052556275 | |||
60d965f79e | |||
83b15bfcdd | |||
1ecbf9794d | |||
2ded354403 | |||
340428a1fe | |||
c77b7ee897 | |||
b6c3bc574b | |||
1e355a51e1 | |||
ad80f61fba | |||
61469252fe | |||
02198ac5b5 | |||
21abaf7e91 | |||
165bffc2e7 | |||
644fd6d32e | |||
f54e0ec9bd | |||
a155a362da | |||
60d4564151 | |||
d4e57f4bc6 | |||
3920b2c0ab | |||
2733c4b93c | |||
e17c773a0b | |||
36a800f26c | |||
b75da563d9 | |||
f9faec38be | |||
d6b64f47d9 | |||
a359f7a9f5 | |||
b606deb3f0 | |||
090e7aa930 | |||
2dce9c3cff | |||
1e72bd8b8c | |||
dc72293398 | |||
e55c35734b | |||
325e745daa | |||
61413565d0 | |||
ff129d9ad9 | |||
03fcd3b33a | |||
68b02da483 | |||
e051119769 | |||
2d8bb356e3 | |||
f3661aac4f | |||
a7251f28c7 | |||
1eb169ac0b | |||
5674c3e241 | |||
62c4ba0d1e | |||
497e7e4c53 | |||
19526d09c2 | |||
6aeaf6f568 | |||
40f2db9bc0 | |||
2cfa20cc4e | |||
a5f683d124 | |||
02a57ffa6f | |||
9f0d9ade68 | |||
c1b1b89d17 | |||
771235017d | |||
3425751cb8 | |||
db5e8050a8 | |||
a3fbabf404 | |||
22422a84d9 | |||
b6f6da923e | |||
c9fadf97a5 | |||
c650bb3f3d | |||
81395e85d1 | |||
340a29b735 | |||
f7be108e35 | |||
a0fc47c6f9 | |||
42a9ac71d2 | |||
41c2b09184 | |||
294dbf1bf0 | |||
9548c8b91f | |||
7f927a541c | |||
e2f73e3ead | |||
6371676a75 | |||
bd84c23298 | |||
7aa8d5e8af | |||
6012b0ec23 | |||
411ac49dd7 | |||
b8fb05a422 | |||
5c57d4f403 | |||
fc6ad65751 | |||
dafc74020c | |||
d19321dfde | |||
5924e5a562 | |||
c99d748da6 | |||
02452afd36 | |||
331768dcff | |||
4aac345bea | |||
15c0022042 | |||
aae8bf31a7 | |||
1e68b1c1bd | |||
491a708225 | |||
5a80930dd2 | |||
145a295231 | |||
841a37f941 | |||
e6cad3821c | |||
98de1cbb6a | |||
f7d61b8b81 | |||
78c4e862ef | |||
1e0be161e5 | |||
0afcf1cf13 | |||
08edbb5cbe | |||
0abfbcc8eb | |||
1b94253ba4 | |||
36e6f9ac7b | |||
2f41691c11 | |||
09bfe52840 | |||
8c9010d0f4 | |||
42c583265c | |||
539d698492 | |||
31ca609d12 | |||
5710966324 | |||
e108e708a3 | |||
6f0198d4d9 | |||
67ccb043f1 | |||
24a5a81c53 | |||
eb1759d7ea | |||
34a0fde2ad | |||
bc34b7e808 | |||
284453c5e9 | |||
77054bd61c | |||
f2b4edc090 | |||
fb81acca3c | |||
ee9ecb6115 |
32
.gitignore
vendored
32
.gitignore
vendored
@ -5,7 +5,6 @@
|
|||||||
*.o
|
*.o
|
||||||
*.obj
|
*.obj
|
||||||
|
|
||||||
|
|
||||||
# Editor files #
|
# Editor files #
|
||||||
################
|
################
|
||||||
*~
|
*~
|
||||||
@ -48,6 +47,7 @@ Config.h.in
|
|||||||
config.log
|
config.log
|
||||||
config.status
|
config.status
|
||||||
.deps
|
.deps
|
||||||
|
*.inc
|
||||||
|
|
||||||
# http://www.gnu.org/software/autoconf #
|
# http://www.gnu.org/software/autoconf #
|
||||||
########################################
|
########################################
|
||||||
@ -62,19 +62,8 @@ stamp-h1
|
|||||||
config.sub
|
config.sub
|
||||||
config.guess
|
config.guess
|
||||||
INSTALL
|
INSTALL
|
||||||
|
.dirstamp
|
||||||
# Packages #
|
ltmain.sh
|
||||||
############
|
|
||||||
# it's better to unpack these files and commit the raw source
|
|
||||||
# git has its own built in compression methods
|
|
||||||
*.7z
|
|
||||||
*.dmg
|
|
||||||
*.gz
|
|
||||||
*.iso
|
|
||||||
*.jar
|
|
||||||
*.rar
|
|
||||||
*.tar
|
|
||||||
*.zip
|
|
||||||
|
|
||||||
# Logs and databases #
|
# Logs and databases #
|
||||||
######################
|
######################
|
||||||
@ -94,9 +83,22 @@ Thumbs.db
|
|||||||
|
|
||||||
# build directory #
|
# build directory #
|
||||||
###################
|
###################
|
||||||
build/*
|
build*/*
|
||||||
|
|
||||||
# IDE related files #
|
# IDE related files #
|
||||||
#####################
|
#####################
|
||||||
*.xcodeproj/*
|
*.xcodeproj/*
|
||||||
build.sh
|
build.sh
|
||||||
|
|
||||||
|
# Eigen source #
|
||||||
|
################
|
||||||
|
lib/Eigen/*
|
||||||
|
|
||||||
|
# FFTW source #
|
||||||
|
################
|
||||||
|
lib/fftw/*
|
||||||
|
|
||||||
|
# libtool macros #
|
||||||
|
##################
|
||||||
|
m4/lt*
|
||||||
|
m4/libtool.m4
|
106
.travis.yml
Normal file
106
.travis.yml
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
language: cpp
|
||||||
|
|
||||||
|
cache:
|
||||||
|
directories:
|
||||||
|
- clang
|
||||||
|
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- os: osx
|
||||||
|
osx_image: xcode7.2
|
||||||
|
compiler: clang
|
||||||
|
- compiler: gcc
|
||||||
|
addons:
|
||||||
|
apt:
|
||||||
|
sources:
|
||||||
|
- ubuntu-toolchain-r-test
|
||||||
|
packages:
|
||||||
|
- g++-4.9
|
||||||
|
- libmpfr-dev
|
||||||
|
- libgmp-dev
|
||||||
|
- libmpc-dev
|
||||||
|
- libopenmpi-dev
|
||||||
|
- openmpi-bin
|
||||||
|
- binutils-dev
|
||||||
|
env: VERSION=-4.9
|
||||||
|
- compiler: gcc
|
||||||
|
addons:
|
||||||
|
apt:
|
||||||
|
sources:
|
||||||
|
- ubuntu-toolchain-r-test
|
||||||
|
packages:
|
||||||
|
- g++-5
|
||||||
|
- libmpfr-dev
|
||||||
|
- libgmp-dev
|
||||||
|
- libmpc-dev
|
||||||
|
- libopenmpi-dev
|
||||||
|
- openmpi-bin
|
||||||
|
- binutils-dev
|
||||||
|
env: VERSION=-5
|
||||||
|
- compiler: clang
|
||||||
|
addons:
|
||||||
|
apt:
|
||||||
|
sources:
|
||||||
|
- ubuntu-toolchain-r-test
|
||||||
|
packages:
|
||||||
|
- g++-4.8
|
||||||
|
- libmpfr-dev
|
||||||
|
- libgmp-dev
|
||||||
|
- libmpc-dev
|
||||||
|
- libopenmpi-dev
|
||||||
|
- openmpi-bin
|
||||||
|
- binutils-dev
|
||||||
|
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
||||||
|
- compiler: clang
|
||||||
|
addons:
|
||||||
|
apt:
|
||||||
|
sources:
|
||||||
|
- ubuntu-toolchain-r-test
|
||||||
|
packages:
|
||||||
|
- g++-4.8
|
||||||
|
- libmpfr-dev
|
||||||
|
- libgmp-dev
|
||||||
|
- libmpc-dev
|
||||||
|
- libopenmpi-dev
|
||||||
|
- openmpi-bin
|
||||||
|
- binutils-dev
|
||||||
|
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
||||||
|
|
||||||
|
before_install:
|
||||||
|
- export GRIDDIR=`pwd`
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]] && [ ! -e clang/bin ]; then wget $CLANG_LINK; tar -xf `basename $CLANG_LINK`; mkdir clang; mv clang+*/* clang/; fi
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
|
||||||
|
|
||||||
|
install:
|
||||||
|
- export CC=$CC$VERSION
|
||||||
|
- export CXX=$CXX$VERSION
|
||||||
|
- echo $PATH
|
||||||
|
- which $CC
|
||||||
|
- $CC --version
|
||||||
|
- which $CXX
|
||||||
|
- $CXX --version
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
|
||||||
|
|
||||||
|
script:
|
||||||
|
- ./bootstrap.sh
|
||||||
|
- mkdir build
|
||||||
|
- cd build
|
||||||
|
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
|
||||||
|
- make -j4
|
||||||
|
- ./benchmarks/Benchmark_dwf --threads 1
|
||||||
|
- echo make clean
|
||||||
|
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
|
||||||
|
- make -j4
|
||||||
|
- ./benchmarks/Benchmark_dwf --threads 1
|
||||||
|
- echo make clean
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
|
||||||
|
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
|
||||||
|
- make -j4
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
|
||||||
|
|
9
AUTHORS
9
AUTHORS
@ -1,5 +1,4 @@
|
|||||||
Peter Boyle
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Azusa Yamaguchi
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Intel Parallel Computing Centre @ Higgs Centre for Theoretical Physics
|
Author: Peter Boyle <peterboyle@MacBook-Pro.local>
|
||||||
University of Edinburgh
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
Scotland, UK
|
|
||||||
|
876
COPYING
876
COPYING
@ -1,622 +1,281 @@
|
|||||||
GNU GENERAL PUBLIC LICENSE
|
GNU GENERAL PUBLIC LICENSE
|
||||||
Version 3, 29 June 2007
|
Version 2, June 1991
|
||||||
|
|
||||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
Everyone is permitted to copy and distribute verbatim copies
|
Everyone is permitted to copy and distribute verbatim copies
|
||||||
of this license document, but changing it is not allowed.
|
of this license document, but changing it is not allowed.
|
||||||
|
|
||||||
Preamble
|
Preamble
|
||||||
|
|
||||||
The GNU General Public License is a free, copyleft license for
|
The licenses for most software are designed to take away your
|
||||||
software and other kinds of works.
|
freedom to share and change it. By contrast, the GNU General Public
|
||||||
|
License is intended to guarantee your freedom to share and change free
|
||||||
The licenses for most software and other practical works are designed
|
software--to make sure the software is free for all its users. This
|
||||||
to take away your freedom to share and change the works. By contrast,
|
General Public License applies to most of the Free Software
|
||||||
the GNU General Public License is intended to guarantee your freedom to
|
Foundation's software and to any other program whose authors commit to
|
||||||
share and change all versions of a program--to make sure it remains free
|
using it. (Some other Free Software Foundation software is covered by
|
||||||
software for all its users. We, the Free Software Foundation, use the
|
the GNU Lesser General Public License instead.) You can apply it to
|
||||||
GNU General Public License for most of our software; it applies also to
|
|
||||||
any other work released this way by its authors. You can apply it to
|
|
||||||
your programs, too.
|
your programs, too.
|
||||||
|
|
||||||
When we speak of free software, we are referring to freedom, not
|
When we speak of free software, we are referring to freedom, not
|
||||||
price. Our General Public Licenses are designed to make sure that you
|
price. Our General Public Licenses are designed to make sure that you
|
||||||
have the freedom to distribute copies of free software (and charge for
|
have the freedom to distribute copies of free software (and charge for
|
||||||
them if you wish), that you receive source code or can get it if you
|
this service if you wish), that you receive source code or can get it
|
||||||
want it, that you can change the software or use pieces of it in new
|
if you want it, that you can change the software or use pieces of it
|
||||||
free programs, and that you know you can do these things.
|
in new free programs; and that you know you can do these things.
|
||||||
|
|
||||||
To protect your rights, we need to prevent others from denying you
|
To protect your rights, we need to make restrictions that forbid
|
||||||
these rights or asking you to surrender the rights. Therefore, you have
|
anyone to deny you these rights or to ask you to surrender the rights.
|
||||||
certain responsibilities if you distribute copies of the software, or if
|
These restrictions translate to certain responsibilities for you if you
|
||||||
you modify it: responsibilities to respect the freedom of others.
|
distribute copies of the software, or if you modify it.
|
||||||
|
|
||||||
For example, if you distribute copies of such a program, whether
|
For example, if you distribute copies of such a program, whether
|
||||||
gratis or for a fee, you must pass on to the recipients the same
|
gratis or for a fee, you must give the recipients all the rights that
|
||||||
freedoms that you received. You must make sure that they, too, receive
|
you have. You must make sure that they, too, receive or can get the
|
||||||
or can get the source code. And you must show them these terms so they
|
source code. And you must show them these terms so they know their
|
||||||
know their rights.
|
rights.
|
||||||
|
|
||||||
Developers that use the GNU GPL protect your rights with two steps:
|
We protect your rights with two steps: (1) copyright the software, and
|
||||||
(1) assert copyright on the software, and (2) offer you this License
|
(2) offer you this license which gives you legal permission to copy,
|
||||||
giving you legal permission to copy, distribute and/or modify it.
|
distribute and/or modify the software.
|
||||||
|
|
||||||
For the developers' and authors' protection, the GPL clearly explains
|
Also, for each author's protection and ours, we want to make certain
|
||||||
that there is no warranty for this free software. For both users' and
|
that everyone understands that there is no warranty for this free
|
||||||
authors' sake, the GPL requires that modified versions be marked as
|
software. If the software is modified by someone else and passed on, we
|
||||||
changed, so that their problems will not be attributed erroneously to
|
want its recipients to know that what they have is not the original, so
|
||||||
authors of previous versions.
|
that any problems introduced by others will not reflect on the original
|
||||||
|
authors' reputations.
|
||||||
|
|
||||||
Some devices are designed to deny users access to install or run
|
Finally, any free program is threatened constantly by software
|
||||||
modified versions of the software inside them, although the manufacturer
|
patents. We wish to avoid the danger that redistributors of a free
|
||||||
can do so. This is fundamentally incompatible with the aim of
|
program will individually obtain patent licenses, in effect making the
|
||||||
protecting users' freedom to change the software. The systematic
|
program proprietary. To prevent this, we have made it clear that any
|
||||||
pattern of such abuse occurs in the area of products for individuals to
|
patent must be licensed for everyone's free use or not licensed at all.
|
||||||
use, which is precisely where it is most unacceptable. Therefore, we
|
|
||||||
have designed this version of the GPL to prohibit the practice for those
|
|
||||||
products. If such problems arise substantially in other domains, we
|
|
||||||
stand ready to extend this provision to those domains in future versions
|
|
||||||
of the GPL, as needed to protect the freedom of users.
|
|
||||||
|
|
||||||
Finally, every program is threatened constantly by software patents.
|
|
||||||
States should not allow patents to restrict development and use of
|
|
||||||
software on general-purpose computers, but in those that do, we wish to
|
|
||||||
avoid the special danger that patents applied to a free program could
|
|
||||||
make it effectively proprietary. To prevent this, the GPL assures that
|
|
||||||
patents cannot be used to render the program non-free.
|
|
||||||
|
|
||||||
The precise terms and conditions for copying, distribution and
|
The precise terms and conditions for copying, distribution and
|
||||||
modification follow.
|
modification follow.
|
||||||
|
|
||||||
TERMS AND CONDITIONS
|
GNU GENERAL PUBLIC LICENSE
|
||||||
|
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||||
0. Definitions.
|
|
||||||
|
0. This License applies to any program or other work which contains
|
||||||
"This License" refers to version 3 of the GNU General Public License.
|
a notice placed by the copyright holder saying it may be distributed
|
||||||
|
under the terms of this General Public License. The "Program", below,
|
||||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
refers to any such program or work, and a "work based on the Program"
|
||||||
works, such as semiconductor masks.
|
means either the Program or any derivative work under copyright law:
|
||||||
|
that is to say, a work containing the Program or a portion of it,
|
||||||
"The Program" refers to any copyrightable work licensed under this
|
either verbatim or with modifications and/or translated into another
|
||||||
License. Each licensee is addressed as "you". "Licensees" and
|
language. (Hereinafter, translation is included without limitation in
|
||||||
"recipients" may be individuals or organizations.
|
the term "modification".) Each licensee is addressed as "you".
|
||||||
|
|
||||||
To "modify" a work means to copy from or adapt all or part of the work
|
Activities other than copying, distribution and modification are not
|
||||||
in a fashion requiring copyright permission, other than the making of an
|
covered by this License; they are outside its scope. The act of
|
||||||
exact copy. The resulting work is called a "modified version" of the
|
running the Program is not restricted, and the output from the Program
|
||||||
earlier work or a work "based on" the earlier work.
|
is covered only if its contents constitute a work based on the
|
||||||
|
Program (independent of having been made by running the Program).
|
||||||
A "covered work" means either the unmodified Program or a work based
|
Whether that is true depends on what the Program does.
|
||||||
on the Program.
|
|
||||||
|
1. You may copy and distribute verbatim copies of the Program's
|
||||||
To "propagate" a work means to do anything with it that, without
|
source code as you receive it, in any medium, provided that you
|
||||||
permission, would make you directly or secondarily liable for
|
conspicuously and appropriately publish on each copy an appropriate
|
||||||
infringement under applicable copyright law, except executing it on a
|
copyright notice and disclaimer of warranty; keep intact all the
|
||||||
computer or modifying a private copy. Propagation includes copying,
|
notices that refer to this License and to the absence of any warranty;
|
||||||
distribution (with or without modification), making available to the
|
and give any other recipients of the Program a copy of this License
|
||||||
public, and in some countries other activities as well.
|
along with the Program.
|
||||||
|
|
||||||
To "convey" a work means any kind of propagation that enables other
|
You may charge a fee for the physical act of transferring a copy, and
|
||||||
parties to make or receive copies. Mere interaction with a user through
|
you may at your option offer warranty protection in exchange for a fee.
|
||||||
a computer network, with no transfer of a copy, is not conveying.
|
|
||||||
|
2. You may modify your copy or copies of the Program or any portion
|
||||||
An interactive user interface displays "Appropriate Legal Notices"
|
of it, thus forming a work based on the Program, and copy and
|
||||||
to the extent that it includes a convenient and prominently visible
|
distribute such modifications or work under the terms of Section 1
|
||||||
feature that (1) displays an appropriate copyright notice, and (2)
|
above, provided that you also meet all of these conditions:
|
||||||
tells the user that there is no warranty for the work (except to the
|
|
||||||
extent that warranties are provided), that licensees may convey the
|
a) You must cause the modified files to carry prominent notices
|
||||||
work under this License, and how to view a copy of this License. If
|
stating that you changed the files and the date of any change.
|
||||||
the interface presents a list of user commands or options, such as a
|
|
||||||
menu, a prominent item in the list meets this criterion.
|
b) You must cause any work that you distribute or publish, that in
|
||||||
|
whole or in part contains or is derived from the Program or any
|
||||||
1. Source Code.
|
part thereof, to be licensed as a whole at no charge to all third
|
||||||
|
parties under the terms of this License.
|
||||||
The "source code" for a work means the preferred form of the work
|
|
||||||
for making modifications to it. "Object code" means any non-source
|
c) If the modified program normally reads commands interactively
|
||||||
form of a work.
|
when run, you must cause it, when started running for such
|
||||||
|
interactive use in the most ordinary way, to print or display an
|
||||||
A "Standard Interface" means an interface that either is an official
|
announcement including an appropriate copyright notice and a
|
||||||
standard defined by a recognized standards body, or, in the case of
|
notice that there is no warranty (or else, saying that you provide
|
||||||
interfaces specified for a particular programming language, one that
|
a warranty) and that users may redistribute the program under
|
||||||
is widely used among developers working in that language.
|
these conditions, and telling the user how to view a copy of this
|
||||||
|
License. (Exception: if the Program itself is interactive but
|
||||||
The "System Libraries" of an executable work include anything, other
|
does not normally print such an announcement, your work based on
|
||||||
than the work as a whole, that (a) is included in the normal form of
|
the Program is not required to print an announcement.)
|
||||||
packaging a Major Component, but which is not part of that Major
|
|
||||||
Component, and (b) serves only to enable use of the work with that
|
These requirements apply to the modified work as a whole. If
|
||||||
Major Component, or to implement a Standard Interface for which an
|
identifiable sections of that work are not derived from the Program,
|
||||||
implementation is available to the public in source code form. A
|
and can be reasonably considered independent and separate works in
|
||||||
"Major Component", in this context, means a major essential component
|
themselves, then this License, and its terms, do not apply to those
|
||||||
(kernel, window system, and so on) of the specific operating system
|
sections when you distribute them as separate works. But when you
|
||||||
(if any) on which the executable work runs, or a compiler used to
|
distribute the same sections as part of a whole which is a work based
|
||||||
produce the work, or an object code interpreter used to run it.
|
on the Program, the distribution of the whole must be on the terms of
|
||||||
|
this License, whose permissions for other licensees extend to the
|
||||||
The "Corresponding Source" for a work in object code form means all
|
entire whole, and thus to each and every part regardless of who wrote it.
|
||||||
the source code needed to generate, install, and (for an executable
|
|
||||||
work) run the object code and to modify the work, including scripts to
|
Thus, it is not the intent of this section to claim rights or contest
|
||||||
control those activities. However, it does not include the work's
|
your rights to work written entirely by you; rather, the intent is to
|
||||||
System Libraries, or general-purpose tools or generally available free
|
exercise the right to control the distribution of derivative or
|
||||||
programs which are used unmodified in performing those activities but
|
collective works based on the Program.
|
||||||
which are not part of the work. For example, Corresponding Source
|
|
||||||
includes interface definition files associated with source files for
|
In addition, mere aggregation of another work not based on the Program
|
||||||
the work, and the source code for shared libraries and dynamically
|
with the Program (or with a work based on the Program) on a volume of
|
||||||
linked subprograms that the work is specifically designed to require,
|
a storage or distribution medium does not bring the other work under
|
||||||
such as by intimate data communication or control flow between those
|
the scope of this License.
|
||||||
subprograms and other parts of the work.
|
|
||||||
|
3. You may copy and distribute the Program (or a work based on it,
|
||||||
The Corresponding Source need not include anything that users
|
under Section 2) in object code or executable form under the terms of
|
||||||
can regenerate automatically from other parts of the Corresponding
|
Sections 1 and 2 above provided that you also do one of the following:
|
||||||
Source.
|
|
||||||
|
a) Accompany it with the complete corresponding machine-readable
|
||||||
The Corresponding Source for a work in source code form is that
|
source code, which must be distributed under the terms of Sections
|
||||||
same work.
|
1 and 2 above on a medium customarily used for software interchange; or,
|
||||||
|
|
||||||
2. Basic Permissions.
|
b) Accompany it with a written offer, valid for at least three
|
||||||
|
years, to give any third party, for a charge no more than your
|
||||||
All rights granted under this License are granted for the term of
|
cost of physically performing source distribution, a complete
|
||||||
copyright on the Program, and are irrevocable provided the stated
|
machine-readable copy of the corresponding source code, to be
|
||||||
conditions are met. This License explicitly affirms your unlimited
|
distributed under the terms of Sections 1 and 2 above on a medium
|
||||||
permission to run the unmodified Program. The output from running a
|
customarily used for software interchange; or,
|
||||||
covered work is covered by this License only if the output, given its
|
|
||||||
content, constitutes a covered work. This License acknowledges your
|
c) Accompany it with the information you received as to the offer
|
||||||
rights of fair use or other equivalent, as provided by copyright law.
|
to distribute corresponding source code. (This alternative is
|
||||||
|
allowed only for noncommercial distribution and only if you
|
||||||
You may make, run and propagate covered works that you do not
|
received the program in object code or executable form with such
|
||||||
convey, without conditions so long as your license otherwise remains
|
an offer, in accord with Subsection b above.)
|
||||||
in force. You may convey covered works to others for the sole purpose
|
|
||||||
of having them make modifications exclusively for you, or provide you
|
The source code for a work means the preferred form of the work for
|
||||||
with facilities for running those works, provided that you comply with
|
making modifications to it. For an executable work, complete source
|
||||||
the terms of this License in conveying all material for which you do
|
code means all the source code for all modules it contains, plus any
|
||||||
not control copyright. Those thus making or running the covered works
|
associated interface definition files, plus the scripts used to
|
||||||
for you must do so exclusively on your behalf, under your direction
|
control compilation and installation of the executable. However, as a
|
||||||
and control, on terms that prohibit them from making any copies of
|
special exception, the source code distributed need not include
|
||||||
your copyrighted material outside their relationship with you.
|
anything that is normally distributed (in either source or binary
|
||||||
|
form) with the major components (compiler, kernel, and so on) of the
|
||||||
Conveying under any other circumstances is permitted solely under
|
operating system on which the executable runs, unless that component
|
||||||
the conditions stated below. Sublicensing is not allowed; section 10
|
itself accompanies the executable.
|
||||||
makes it unnecessary.
|
|
||||||
|
If distribution of executable or object code is made by offering
|
||||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
access to copy from a designated place, then offering equivalent
|
||||||
|
access to copy the source code from the same place counts as
|
||||||
No covered work shall be deemed part of an effective technological
|
distribution of the source code, even though third parties are not
|
||||||
measure under any applicable law fulfilling obligations under article
|
compelled to copy the source along with the object code.
|
||||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
|
||||||
similar laws prohibiting or restricting circumvention of such
|
4. You may not copy, modify, sublicense, or distribute the Program
|
||||||
measures.
|
except as expressly provided under this License. Any attempt
|
||||||
|
otherwise to copy, modify, sublicense or distribute the Program is
|
||||||
When you convey a covered work, you waive any legal power to forbid
|
void, and will automatically terminate your rights under this License.
|
||||||
circumvention of technological measures to the extent such circumvention
|
However, parties who have received copies, or rights, from you under
|
||||||
is effected by exercising rights under this License with respect to
|
this License will not have their licenses terminated so long as such
|
||||||
the covered work, and you disclaim any intention to limit operation or
|
parties remain in full compliance.
|
||||||
modification of the work as a means of enforcing, against the work's
|
|
||||||
users, your or third parties' legal rights to forbid circumvention of
|
5. You are not required to accept this License, since you have not
|
||||||
technological measures.
|
signed it. However, nothing else grants you permission to modify or
|
||||||
|
distribute the Program or its derivative works. These actions are
|
||||||
4. Conveying Verbatim Copies.
|
prohibited by law if you do not accept this License. Therefore, by
|
||||||
|
modifying or distributing the Program (or any work based on the
|
||||||
You may convey verbatim copies of the Program's source code as you
|
Program), you indicate your acceptance of this License to do so, and
|
||||||
receive it, in any medium, provided that you conspicuously and
|
all its terms and conditions for copying, distributing or modifying
|
||||||
appropriately publish on each copy an appropriate copyright notice;
|
the Program or works based on it.
|
||||||
keep intact all notices stating that this License and any
|
|
||||||
non-permissive terms added in accord with section 7 apply to the code;
|
6. Each time you redistribute the Program (or any work based on the
|
||||||
keep intact all notices of the absence of any warranty; and give all
|
Program), the recipient automatically receives a license from the
|
||||||
recipients a copy of this License along with the Program.
|
original licensor to copy, distribute or modify the Program subject to
|
||||||
|
these terms and conditions. You may not impose any further
|
||||||
You may charge any price or no price for each copy that you convey,
|
restrictions on the recipients' exercise of the rights granted herein.
|
||||||
and you may offer support or warranty protection for a fee.
|
You are not responsible for enforcing compliance by third parties to
|
||||||
|
|
||||||
5. Conveying Modified Source Versions.
|
|
||||||
|
|
||||||
You may convey a work based on the Program, or the modifications to
|
|
||||||
produce it from the Program, in the form of source code under the
|
|
||||||
terms of section 4, provided that you also meet all of these conditions:
|
|
||||||
|
|
||||||
a) The work must carry prominent notices stating that you modified
|
|
||||||
it, and giving a relevant date.
|
|
||||||
|
|
||||||
b) The work must carry prominent notices stating that it is
|
|
||||||
released under this License and any conditions added under section
|
|
||||||
7. This requirement modifies the requirement in section 4 to
|
|
||||||
"keep intact all notices".
|
|
||||||
|
|
||||||
c) You must license the entire work, as a whole, under this
|
|
||||||
License to anyone who comes into possession of a copy. This
|
|
||||||
License will therefore apply, along with any applicable section 7
|
|
||||||
additional terms, to the whole of the work, and all its parts,
|
|
||||||
regardless of how they are packaged. This License gives no
|
|
||||||
permission to license the work in any other way, but it does not
|
|
||||||
invalidate such permission if you have separately received it.
|
|
||||||
|
|
||||||
d) If the work has interactive user interfaces, each must display
|
|
||||||
Appropriate Legal Notices; however, if the Program has interactive
|
|
||||||
interfaces that do not display Appropriate Legal Notices, your
|
|
||||||
work need not make them do so.
|
|
||||||
|
|
||||||
A compilation of a covered work with other separate and independent
|
|
||||||
works, which are not by their nature extensions of the covered work,
|
|
||||||
and which are not combined with it such as to form a larger program,
|
|
||||||
in or on a volume of a storage or distribution medium, is called an
|
|
||||||
"aggregate" if the compilation and its resulting copyright are not
|
|
||||||
used to limit the access or legal rights of the compilation's users
|
|
||||||
beyond what the individual works permit. Inclusion of a covered work
|
|
||||||
in an aggregate does not cause this License to apply to the other
|
|
||||||
parts of the aggregate.
|
|
||||||
|
|
||||||
6. Conveying Non-Source Forms.
|
|
||||||
|
|
||||||
You may convey a covered work in object code form under the terms
|
|
||||||
of sections 4 and 5, provided that you also convey the
|
|
||||||
machine-readable Corresponding Source under the terms of this License,
|
|
||||||
in one of these ways:
|
|
||||||
|
|
||||||
a) Convey the object code in, or embodied in, a physical product
|
|
||||||
(including a physical distribution medium), accompanied by the
|
|
||||||
Corresponding Source fixed on a durable physical medium
|
|
||||||
customarily used for software interchange.
|
|
||||||
|
|
||||||
b) Convey the object code in, or embodied in, a physical product
|
|
||||||
(including a physical distribution medium), accompanied by a
|
|
||||||
written offer, valid for at least three years and valid for as
|
|
||||||
long as you offer spare parts or customer support for that product
|
|
||||||
model, to give anyone who possesses the object code either (1) a
|
|
||||||
copy of the Corresponding Source for all the software in the
|
|
||||||
product that is covered by this License, on a durable physical
|
|
||||||
medium customarily used for software interchange, for a price no
|
|
||||||
more than your reasonable cost of physically performing this
|
|
||||||
conveying of source, or (2) access to copy the
|
|
||||||
Corresponding Source from a network server at no charge.
|
|
||||||
|
|
||||||
c) Convey individual copies of the object code with a copy of the
|
|
||||||
written offer to provide the Corresponding Source. This
|
|
||||||
alternative is allowed only occasionally and noncommercially, and
|
|
||||||
only if you received the object code with such an offer, in accord
|
|
||||||
with subsection 6b.
|
|
||||||
|
|
||||||
d) Convey the object code by offering access from a designated
|
|
||||||
place (gratis or for a charge), and offer equivalent access to the
|
|
||||||
Corresponding Source in the same way through the same place at no
|
|
||||||
further charge. You need not require recipients to copy the
|
|
||||||
Corresponding Source along with the object code. If the place to
|
|
||||||
copy the object code is a network server, the Corresponding Source
|
|
||||||
may be on a different server (operated by you or a third party)
|
|
||||||
that supports equivalent copying facilities, provided you maintain
|
|
||||||
clear directions next to the object code saying where to find the
|
|
||||||
Corresponding Source. Regardless of what server hosts the
|
|
||||||
Corresponding Source, you remain obligated to ensure that it is
|
|
||||||
available for as long as needed to satisfy these requirements.
|
|
||||||
|
|
||||||
e) Convey the object code using peer-to-peer transmission, provided
|
|
||||||
you inform other peers where the object code and Corresponding
|
|
||||||
Source of the work are being offered to the general public at no
|
|
||||||
charge under subsection 6d.
|
|
||||||
|
|
||||||
A separable portion of the object code, whose source code is excluded
|
|
||||||
from the Corresponding Source as a System Library, need not be
|
|
||||||
included in conveying the object code work.
|
|
||||||
|
|
||||||
A "User Product" is either (1) a "consumer product", which means any
|
|
||||||
tangible personal property which is normally used for personal, family,
|
|
||||||
or household purposes, or (2) anything designed or sold for incorporation
|
|
||||||
into a dwelling. In determining whether a product is a consumer product,
|
|
||||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
|
||||||
product received by a particular user, "normally used" refers to a
|
|
||||||
typical or common use of that class of product, regardless of the status
|
|
||||||
of the particular user or of the way in which the particular user
|
|
||||||
actually uses, or expects or is expected to use, the product. A product
|
|
||||||
is a consumer product regardless of whether the product has substantial
|
|
||||||
commercial, industrial or non-consumer uses, unless such uses represent
|
|
||||||
the only significant mode of use of the product.
|
|
||||||
|
|
||||||
"Installation Information" for a User Product means any methods,
|
|
||||||
procedures, authorization keys, or other information required to install
|
|
||||||
and execute modified versions of a covered work in that User Product from
|
|
||||||
a modified version of its Corresponding Source. The information must
|
|
||||||
suffice to ensure that the continued functioning of the modified object
|
|
||||||
code is in no case prevented or interfered with solely because
|
|
||||||
modification has been made.
|
|
||||||
|
|
||||||
If you convey an object code work under this section in, or with, or
|
|
||||||
specifically for use in, a User Product, and the conveying occurs as
|
|
||||||
part of a transaction in which the right of possession and use of the
|
|
||||||
User Product is transferred to the recipient in perpetuity or for a
|
|
||||||
fixed term (regardless of how the transaction is characterized), the
|
|
||||||
Corresponding Source conveyed under this section must be accompanied
|
|
||||||
by the Installation Information. But this requirement does not apply
|
|
||||||
if neither you nor any third party retains the ability to install
|
|
||||||
modified object code on the User Product (for example, the work has
|
|
||||||
been installed in ROM).
|
|
||||||
|
|
||||||
The requirement to provide Installation Information does not include a
|
|
||||||
requirement to continue to provide support service, warranty, or updates
|
|
||||||
for a work that has been modified or installed by the recipient, or for
|
|
||||||
the User Product in which it has been modified or installed. Access to a
|
|
||||||
network may be denied when the modification itself materially and
|
|
||||||
adversely affects the operation of the network or violates the rules and
|
|
||||||
protocols for communication across the network.
|
|
||||||
|
|
||||||
Corresponding Source conveyed, and Installation Information provided,
|
|
||||||
in accord with this section must be in a format that is publicly
|
|
||||||
documented (and with an implementation available to the public in
|
|
||||||
source code form), and must require no special password or key for
|
|
||||||
unpacking, reading or copying.
|
|
||||||
|
|
||||||
7. Additional Terms.
|
|
||||||
|
|
||||||
"Additional permissions" are terms that supplement the terms of this
|
|
||||||
License by making exceptions from one or more of its conditions.
|
|
||||||
Additional permissions that are applicable to the entire Program shall
|
|
||||||
be treated as though they were included in this License, to the extent
|
|
||||||
that they are valid under applicable law. If additional permissions
|
|
||||||
apply only to part of the Program, that part may be used separately
|
|
||||||
under those permissions, but the entire Program remains governed by
|
|
||||||
this License without regard to the additional permissions.
|
|
||||||
|
|
||||||
When you convey a copy of a covered work, you may at your option
|
|
||||||
remove any additional permissions from that copy, or from any part of
|
|
||||||
it. (Additional permissions may be written to require their own
|
|
||||||
removal in certain cases when you modify the work.) You may place
|
|
||||||
additional permissions on material, added by you to a covered work,
|
|
||||||
for which you have or can give appropriate copyright permission.
|
|
||||||
|
|
||||||
Notwithstanding any other provision of this License, for material you
|
|
||||||
add to a covered work, you may (if authorized by the copyright holders of
|
|
||||||
that material) supplement the terms of this License with terms:
|
|
||||||
|
|
||||||
a) Disclaiming warranty or limiting liability differently from the
|
|
||||||
terms of sections 15 and 16 of this License; or
|
|
||||||
|
|
||||||
b) Requiring preservation of specified reasonable legal notices or
|
|
||||||
author attributions in that material or in the Appropriate Legal
|
|
||||||
Notices displayed by works containing it; or
|
|
||||||
|
|
||||||
c) Prohibiting misrepresentation of the origin of that material, or
|
|
||||||
requiring that modified versions of such material be marked in
|
|
||||||
reasonable ways as different from the original version; or
|
|
||||||
|
|
||||||
d) Limiting the use for publicity purposes of names of licensors or
|
|
||||||
authors of the material; or
|
|
||||||
|
|
||||||
e) Declining to grant rights under trademark law for use of some
|
|
||||||
trade names, trademarks, or service marks; or
|
|
||||||
|
|
||||||
f) Requiring indemnification of licensors and authors of that
|
|
||||||
material by anyone who conveys the material (or modified versions of
|
|
||||||
it) with contractual assumptions of liability to the recipient, for
|
|
||||||
any liability that these contractual assumptions directly impose on
|
|
||||||
those licensors and authors.
|
|
||||||
|
|
||||||
All other non-permissive additional terms are considered "further
|
|
||||||
restrictions" within the meaning of section 10. If the Program as you
|
|
||||||
received it, or any part of it, contains a notice stating that it is
|
|
||||||
governed by this License along with a term that is a further
|
|
||||||
restriction, you may remove that term. If a license document contains
|
|
||||||
a further restriction but permits relicensing or conveying under this
|
|
||||||
License, you may add to a covered work material governed by the terms
|
|
||||||
of that license document, provided that the further restriction does
|
|
||||||
not survive such relicensing or conveying.
|
|
||||||
|
|
||||||
If you add terms to a covered work in accord with this section, you
|
|
||||||
must place, in the relevant source files, a statement of the
|
|
||||||
additional terms that apply to those files, or a notice indicating
|
|
||||||
where to find the applicable terms.
|
|
||||||
|
|
||||||
Additional terms, permissive or non-permissive, may be stated in the
|
|
||||||
form of a separately written license, or stated as exceptions;
|
|
||||||
the above requirements apply either way.
|
|
||||||
|
|
||||||
8. Termination.
|
|
||||||
|
|
||||||
You may not propagate or modify a covered work except as expressly
|
|
||||||
provided under this License. Any attempt otherwise to propagate or
|
|
||||||
modify it is void, and will automatically terminate your rights under
|
|
||||||
this License (including any patent licenses granted under the third
|
|
||||||
paragraph of section 11).
|
|
||||||
|
|
||||||
However, if you cease all violation of this License, then your
|
|
||||||
license from a particular copyright holder is reinstated (a)
|
|
||||||
provisionally, unless and until the copyright holder explicitly and
|
|
||||||
finally terminates your license, and (b) permanently, if the copyright
|
|
||||||
holder fails to notify you of the violation by some reasonable means
|
|
||||||
prior to 60 days after the cessation.
|
|
||||||
|
|
||||||
Moreover, your license from a particular copyright holder is
|
|
||||||
reinstated permanently if the copyright holder notifies you of the
|
|
||||||
violation by some reasonable means, this is the first time you have
|
|
||||||
received notice of violation of this License (for any work) from that
|
|
||||||
copyright holder, and you cure the violation prior to 30 days after
|
|
||||||
your receipt of the notice.
|
|
||||||
|
|
||||||
Termination of your rights under this section does not terminate the
|
|
||||||
licenses of parties who have received copies or rights from you under
|
|
||||||
this License. If your rights have been terminated and not permanently
|
|
||||||
reinstated, you do not qualify to receive new licenses for the same
|
|
||||||
material under section 10.
|
|
||||||
|
|
||||||
9. Acceptance Not Required for Having Copies.
|
|
||||||
|
|
||||||
You are not required to accept this License in order to receive or
|
|
||||||
run a copy of the Program. Ancillary propagation of a covered work
|
|
||||||
occurring solely as a consequence of using peer-to-peer transmission
|
|
||||||
to receive a copy likewise does not require acceptance. However,
|
|
||||||
nothing other than this License grants you permission to propagate or
|
|
||||||
modify any covered work. These actions infringe copyright if you do
|
|
||||||
not accept this License. Therefore, by modifying or propagating a
|
|
||||||
covered work, you indicate your acceptance of this License to do so.
|
|
||||||
|
|
||||||
10. Automatic Licensing of Downstream Recipients.
|
|
||||||
|
|
||||||
Each time you convey a covered work, the recipient automatically
|
|
||||||
receives a license from the original licensors, to run, modify and
|
|
||||||
propagate that work, subject to this License. You are not responsible
|
|
||||||
for enforcing compliance by third parties with this License.
|
|
||||||
|
|
||||||
An "entity transaction" is a transaction transferring control of an
|
|
||||||
organization, or substantially all assets of one, or subdividing an
|
|
||||||
organization, or merging organizations. If propagation of a covered
|
|
||||||
work results from an entity transaction, each party to that
|
|
||||||
transaction who receives a copy of the work also receives whatever
|
|
||||||
licenses to the work the party's predecessor in interest had or could
|
|
||||||
give under the previous paragraph, plus a right to possession of the
|
|
||||||
Corresponding Source of the work from the predecessor in interest, if
|
|
||||||
the predecessor has it or can get it with reasonable efforts.
|
|
||||||
|
|
||||||
You may not impose any further restrictions on the exercise of the
|
|
||||||
rights granted or affirmed under this License. For example, you may
|
|
||||||
not impose a license fee, royalty, or other charge for exercise of
|
|
||||||
rights granted under this License, and you may not initiate litigation
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
any patent claim is infringed by making, using, selling, offering for
|
|
||||||
sale, or importing the Program or any portion of it.
|
|
||||||
|
|
||||||
11. Patents.
|
|
||||||
|
|
||||||
A "contributor" is a copyright holder who authorizes use under this
|
|
||||||
License of the Program or a work on which the Program is based. The
|
|
||||||
work thus licensed is called the contributor's "contributor version".
|
|
||||||
|
|
||||||
A contributor's "essential patent claims" are all patent claims
|
|
||||||
owned or controlled by the contributor, whether already acquired or
|
|
||||||
hereafter acquired, that would be infringed by some manner, permitted
|
|
||||||
by this License, of making, using, or selling its contributor version,
|
|
||||||
but do not include claims that would be infringed only as a
|
|
||||||
consequence of further modification of the contributor version. For
|
|
||||||
purposes of this definition, "control" includes the right to grant
|
|
||||||
patent sublicenses in a manner consistent with the requirements of
|
|
||||||
this License.
|
this License.
|
||||||
|
|
||||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
7. If, as a consequence of a court judgment or allegation of patent
|
||||||
patent license under the contributor's essential patent claims, to
|
infringement or for any other reason (not limited to patent issues),
|
||||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
conditions are imposed on you (whether by court order, agreement or
|
||||||
propagate the contents of its contributor version.
|
|
||||||
|
|
||||||
In the following three paragraphs, a "patent license" is any express
|
|
||||||
agreement or commitment, however denominated, not to enforce a patent
|
|
||||||
(such as an express permission to practice a patent or covenant not to
|
|
||||||
sue for patent infringement). To "grant" such a patent license to a
|
|
||||||
party means to make such an agreement or commitment not to enforce a
|
|
||||||
patent against the party.
|
|
||||||
|
|
||||||
If you convey a covered work, knowingly relying on a patent license,
|
|
||||||
and the Corresponding Source of the work is not available for anyone
|
|
||||||
to copy, free of charge and under the terms of this License, through a
|
|
||||||
publicly available network server or other readily accessible means,
|
|
||||||
then you must either (1) cause the Corresponding Source to be so
|
|
||||||
available, or (2) arrange to deprive yourself of the benefit of the
|
|
||||||
patent license for this particular work, or (3) arrange, in a manner
|
|
||||||
consistent with the requirements of this License, to extend the patent
|
|
||||||
license to downstream recipients. "Knowingly relying" means you have
|
|
||||||
actual knowledge that, but for the patent license, your conveying the
|
|
||||||
covered work in a country, or your recipient's use of the covered work
|
|
||||||
in a country, would infringe one or more identifiable patents in that
|
|
||||||
country that you have reason to believe are valid.
|
|
||||||
|
|
||||||
If, pursuant to or in connection with a single transaction or
|
|
||||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
|
||||||
covered work, and grant a patent license to some of the parties
|
|
||||||
receiving the covered work authorizing them to use, propagate, modify
|
|
||||||
or convey a specific copy of the covered work, then the patent license
|
|
||||||
you grant is automatically extended to all recipients of the covered
|
|
||||||
work and works based on it.
|
|
||||||
|
|
||||||
A patent license is "discriminatory" if it does not include within
|
|
||||||
the scope of its coverage, prohibits the exercise of, or is
|
|
||||||
conditioned on the non-exercise of one or more of the rights that are
|
|
||||||
specifically granted under this License. You may not convey a covered
|
|
||||||
work if you are a party to an arrangement with a third party that is
|
|
||||||
in the business of distributing software, under which you make payment
|
|
||||||
to the third party based on the extent of your activity of conveying
|
|
||||||
the work, and under which the third party grants, to any of the
|
|
||||||
parties who would receive the covered work from you, a discriminatory
|
|
||||||
patent license (a) in connection with copies of the covered work
|
|
||||||
conveyed by you (or copies made from those copies), or (b) primarily
|
|
||||||
for and in connection with specific products or compilations that
|
|
||||||
contain the covered work, unless you entered into that arrangement,
|
|
||||||
or that patent license was granted, prior to 28 March 2007.
|
|
||||||
|
|
||||||
Nothing in this License shall be construed as excluding or limiting
|
|
||||||
any implied license or other defenses to infringement that may
|
|
||||||
otherwise be available to you under applicable patent law.
|
|
||||||
|
|
||||||
12. No Surrender of Others' Freedom.
|
|
||||||
|
|
||||||
If conditions are imposed on you (whether by court order, agreement or
|
|
||||||
otherwise) that contradict the conditions of this License, they do not
|
otherwise) that contradict the conditions of this License, they do not
|
||||||
excuse you from the conditions of this License. If you cannot convey a
|
excuse you from the conditions of this License. If you cannot
|
||||||
covered work so as to satisfy simultaneously your obligations under this
|
distribute so as to satisfy simultaneously your obligations under this
|
||||||
License and any other pertinent obligations, then as a consequence you may
|
License and any other pertinent obligations, then as a consequence you
|
||||||
not convey it at all. For example, if you agree to terms that obligate you
|
may not distribute the Program at all. For example, if a patent
|
||||||
to collect a royalty for further conveying from those to whom you convey
|
license would not permit royalty-free redistribution of the Program by
|
||||||
the Program, the only way you could satisfy both those terms and this
|
all those who receive copies directly or indirectly through you, then
|
||||||
License would be to refrain entirely from conveying the Program.
|
the only way you could satisfy both it and this License would be to
|
||||||
|
refrain entirely from distribution of the Program.
|
||||||
|
|
||||||
13. Use with the GNU Affero General Public License.
|
If any portion of this section is held invalid or unenforceable under
|
||||||
|
any particular circumstance, the balance of the section is intended to
|
||||||
|
apply and the section as a whole is intended to apply in other
|
||||||
|
circumstances.
|
||||||
|
|
||||||
Notwithstanding any other provision of this License, you have
|
It is not the purpose of this section to induce you to infringe any
|
||||||
permission to link or combine any covered work with a work licensed
|
patents or other property right claims or to contest validity of any
|
||||||
under version 3 of the GNU Affero General Public License into a single
|
such claims; this section has the sole purpose of protecting the
|
||||||
combined work, and to convey the resulting work. The terms of this
|
integrity of the free software distribution system, which is
|
||||||
License will continue to apply to the part which is the covered work,
|
implemented by public license practices. Many people have made
|
||||||
but the special requirements of the GNU Affero General Public License,
|
generous contributions to the wide range of software distributed
|
||||||
section 13, concerning interaction through a network will apply to the
|
through that system in reliance on consistent application of that
|
||||||
combination as such.
|
system; it is up to the author/donor to decide if he or she is willing
|
||||||
|
to distribute software through any other system and a licensee cannot
|
||||||
|
impose that choice.
|
||||||
|
|
||||||
14. Revised Versions of this License.
|
This section is intended to make thoroughly clear what is believed to
|
||||||
|
be a consequence of the rest of this License.
|
||||||
|
|
||||||
The Free Software Foundation may publish revised and/or new versions of
|
8. If the distribution and/or use of the Program is restricted in
|
||||||
the GNU General Public License from time to time. Such new versions will
|
certain countries either by patents or by copyrighted interfaces, the
|
||||||
|
original copyright holder who places the Program under this License
|
||||||
|
may add an explicit geographical distribution limitation excluding
|
||||||
|
those countries, so that distribution is permitted only in or among
|
||||||
|
countries not thus excluded. In such case, this License incorporates
|
||||||
|
the limitation as if written in the body of this License.
|
||||||
|
|
||||||
|
9. The Free Software Foundation may publish revised and/or new versions
|
||||||
|
of the General Public License from time to time. Such new versions will
|
||||||
be similar in spirit to the present version, but may differ in detail to
|
be similar in spirit to the present version, but may differ in detail to
|
||||||
address new problems or concerns.
|
address new problems or concerns.
|
||||||
|
|
||||||
Each version is given a distinguishing version number. If the
|
Each version is given a distinguishing version number. If the Program
|
||||||
Program specifies that a certain numbered version of the GNU General
|
specifies a version number of this License which applies to it and "any
|
||||||
Public License "or any later version" applies to it, you have the
|
later version", you have the option of following the terms and conditions
|
||||||
option of following the terms and conditions either of that numbered
|
either of that version or of any later version published by the Free
|
||||||
version or of any later version published by the Free Software
|
Software Foundation. If the Program does not specify a version number of
|
||||||
Foundation. If the Program does not specify a version number of the
|
this License, you may choose any version ever published by the Free Software
|
||||||
GNU General Public License, you may choose any version ever published
|
Foundation.
|
||||||
by the Free Software Foundation.
|
|
||||||
|
|
||||||
If the Program specifies that a proxy can decide which future
|
10. If you wish to incorporate parts of the Program into other free
|
||||||
versions of the GNU General Public License can be used, that proxy's
|
programs whose distribution conditions are different, write to the author
|
||||||
public statement of acceptance of a version permanently authorizes you
|
to ask for permission. For software which is copyrighted by the Free
|
||||||
to choose that version for the Program.
|
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||||
|
make exceptions for this. Our decision will be guided by the two goals
|
||||||
|
of preserving the free status of all derivatives of our free software and
|
||||||
|
of promoting the sharing and reuse of software generally.
|
||||||
|
|
||||||
Later license versions may give you additional or different
|
NO WARRANTY
|
||||||
permissions. However, no additional obligations are imposed on any
|
|
||||||
author or copyright holder as a result of your choosing to follow a
|
|
||||||
later version.
|
|
||||||
|
|
||||||
15. Disclaimer of Warranty.
|
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||||
|
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||||
|
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||||
|
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||||
|
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||||
|
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||||
|
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||||
|
REPAIR OR CORRECTION.
|
||||||
|
|
||||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGES.
|
||||||
16. Limitation of Liability.
|
|
||||||
|
|
||||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
|
||||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
|
||||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
|
||||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
|
||||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
|
||||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
|
||||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
|
||||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
|
||||||
SUCH DAMAGES.
|
|
||||||
|
|
||||||
17. Interpretation of Sections 15 and 16.
|
|
||||||
|
|
||||||
If the disclaimer of warranty and limitation of liability provided
|
|
||||||
above cannot be given local legal effect according to their terms,
|
|
||||||
reviewing courts shall apply local law that most closely approximates
|
|
||||||
an absolute waiver of all civil liability in connection with the
|
|
||||||
Program, unless a warranty or assumption of liability accompanies a
|
|
||||||
copy of the Program in return for a fee.
|
|
||||||
|
|
||||||
END OF TERMS AND CONDITIONS
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
@ -628,15 +287,15 @@ free software which everyone can redistribute and change under these terms.
|
|||||||
|
|
||||||
To do so, attach the following notices to the program. It is safest
|
To do so, attach the following notices to the program. It is safest
|
||||||
to attach them to the start of each source file to most effectively
|
to attach them to the start of each source file to most effectively
|
||||||
state the exclusion of warranty; and each file should have at least
|
convey the exclusion of warranty; and each file should have at least
|
||||||
the "copyright" line and a pointer to where the full notice is found.
|
the "copyright" line and a pointer to where the full notice is found.
|
||||||
|
|
||||||
<one line to give the program's name and a brief idea of what it does.>
|
<one line to give the program's name and a brief idea of what it does.>
|
||||||
Copyright (C) <year> <name of author>
|
Copyright (C) <year> <name of author>
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
@ -644,31 +303,38 @@ the "copyright" line and a pointer to where the full notice is found.
|
|||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License along
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
Also add information on how to contact you by electronic and paper mail.
|
Also add information on how to contact you by electronic and paper mail.
|
||||||
|
|
||||||
If the program does terminal interaction, make it output a short
|
If the program is interactive, make it output a short notice like this
|
||||||
notice like this when it starts in an interactive mode:
|
when it starts in an interactive mode:
|
||||||
|
|
||||||
<program> Copyright (C) <year> <name of author>
|
Gnomovision version 69, Copyright (C) year name of author
|
||||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||||
This is free software, and you are welcome to redistribute it
|
This is free software, and you are welcome to redistribute it
|
||||||
under certain conditions; type `show c' for details.
|
under certain conditions; type `show c' for details.
|
||||||
|
|
||||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||||
parts of the General Public License. Of course, your program's commands
|
parts of the General Public License. Of course, the commands you use may
|
||||||
might be different; for a GUI interface, you would use an "about box".
|
be called something other than `show w' and `show c'; they could even be
|
||||||
|
mouse-clicks or menu items--whatever suits your program.
|
||||||
|
|
||||||
You should also get your employer (if you work as a programmer) or school,
|
You should also get your employer (if you work as a programmer) or your
|
||||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||||
For more information on this, and how to apply and follow the GNU GPL, see
|
necessary. Here is a sample; alter the names:
|
||||||
<http://www.gnu.org/licenses/>.
|
|
||||||
|
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||||
|
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||||
|
|
||||||
|
<signature of Ty Coon>, 1 April 1989
|
||||||
|
Ty Coon, President of Vice
|
||||||
|
|
||||||
|
This General Public License does not permit incorporating your program into
|
||||||
|
proprietary programs. If your program is a subroutine library, you may
|
||||||
|
consider it more useful to permit linking proprietary applications with the
|
||||||
|
library. If this is what you want to do, use the GNU Lesser General
|
||||||
|
Public License instead of this License.
|
||||||
|
|
||||||
The GNU General Public License does not permit incorporating your program
|
|
||||||
into proprietary programs. If your program is a subroutine library, you
|
|
||||||
may consider it more useful to permit linking proprietary applications with
|
|
||||||
the library. If this is what you want to do, use the GNU Lesser General
|
|
||||||
Public License instead of this License. But first, please read
|
|
||||||
<http://www.gnu.org/philosophy/why-not-lgpl.html>.
|
|
||||||
|
10
LICENSE
10
LICENSE
@ -1,7 +1,7 @@
|
|||||||
GNU GENERAL PUBLIC LICENSE
|
GNU GENERAL PUBLIC LICENSE
|
||||||
Version 2, June 1991
|
Version 2, June 1991
|
||||||
|
|
||||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
|
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
Everyone is permitted to copy and distribute verbatim copies
|
Everyone is permitted to copy and distribute verbatim copies
|
||||||
of this license document, but changing it is not allowed.
|
of this license document, but changing it is not allowed.
|
||||||
@ -290,8 +290,8 @@ to attach them to the start of each source file to most effectively
|
|||||||
convey the exclusion of warranty; and each file should have at least
|
convey the exclusion of warranty; and each file should have at least
|
||||||
the "copyright" line and a pointer to where the full notice is found.
|
the "copyright" line and a pointer to where the full notice is found.
|
||||||
|
|
||||||
{description}
|
<one line to give the program's name and a brief idea of what it does.>
|
||||||
Copyright (C) {year} {fullname}
|
Copyright (C) <year> <name of author>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -329,7 +329,7 @@ necessary. Here is a sample; alter the names:
|
|||||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||||
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||||
|
|
||||||
{signature of Ty Coon}, 1 April 1989
|
<signature of Ty Coon>, 1 April 1989
|
||||||
Ty Coon, President of Vice
|
Ty Coon, President of Vice
|
||||||
|
|
||||||
This General Public License does not permit incorporating your program into
|
This General Public License does not permit incorporating your program into
|
||||||
|
11
Makefile.am
11
Makefile.am
@ -1,5 +1,10 @@
|
|||||||
# additional include paths necessary to compile the C++ library
|
# additional include paths necessary to compile the C++ library
|
||||||
AM_CXXFLAGS = -I$(top_srcdir)/
|
SUBDIRS = lib benchmarks tests
|
||||||
SUBDIRS = lib tests benchmarks
|
|
||||||
|
|
||||||
filelist: $(SUBDIRS)
|
.PHONY: tests
|
||||||
|
|
||||||
|
tests: all
|
||||||
|
$(MAKE) -C tests tests
|
||||||
|
|
||||||
|
AM_CXXFLAGS += -I$(top_builddir)/include
|
||||||
|
ACLOCAL_AMFLAGS = -I m4
|
||||||
|
44
README
44
README
@ -1,44 +0,0 @@
|
|||||||
This library provides data parallel C++ container classes with internal memory layout
|
|
||||||
that is transformed to map efficiently to SIMD architectures. CSHIFT facilities
|
|
||||||
are provided, similar to HPF and cmfortran, and user control is given over the mapping of
|
|
||||||
array indices to both MPI tasks and SIMD processing elements.
|
|
||||||
|
|
||||||
* Identically shaped arrays then be processed with perfect data parallelisation.
|
|
||||||
* Such identically shapped arrays are called conformable arrays.
|
|
||||||
|
|
||||||
The transformation is based on the observation that Cartesian array processing involves
|
|
||||||
identical processing to be performed on different regions of the Cartesian array.
|
|
||||||
|
|
||||||
The library will (eventually) both geometrically decompose into MPI tasks and across SIMD lanes.
|
|
||||||
|
|
||||||
Data parallel array operations can then be specified with a SINGLE data parallel paradigm, but
|
|
||||||
optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a significant simplification
|
|
||||||
for most programmers.
|
|
||||||
|
|
||||||
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
|
|
||||||
Presently SSE2 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported.
|
|
||||||
|
|
||||||
These are presented as
|
|
||||||
|
|
||||||
vRealF, vRealD, vComplexF, vComplexD
|
|
||||||
|
|
||||||
internal vector data types. These may be useful in themselves for other programmers.
|
|
||||||
The corresponding scalar types are named
|
|
||||||
|
|
||||||
RealF, RealD, ComplexF, ComplexD
|
|
||||||
|
|
||||||
MPI parallelism is UNIMPLEMENTED and for now only OpenMP and SIMD parallelism is present in the library.
|
|
||||||
|
|
||||||
You can give `configure' initial values for configuration parameters
|
|
||||||
by setting variables in the command line or in the environment. Here
|
|
||||||
is are examples:
|
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
|
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1
|
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
|
|
||||||
|
|
||||||
./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
|
|
||||||
|
|
||||||
|
|
183
README.md
183
README.md
@ -1,13 +1,51 @@
|
|||||||
# Grid
|
# Grid
|
||||||
Data parallel C++ mathematical object library
|
<table>
|
||||||
|
<tr>
|
||||||
|
<td>Last stable release</td>
|
||||||
|
<td><a href="https://travis-ci.org/paboyle/Grid">
|
||||||
|
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Development branch</td>
|
||||||
|
<td><a href="https://travis-ci.org/paboyle/Grid">
|
||||||
|
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
**Data parallel C++ mathematical object library.**
|
||||||
|
|
||||||
|
License: GPL v2.
|
||||||
|
|
||||||
|
Last update Nov 2016.
|
||||||
|
|
||||||
|
_Please do not send pull requests to the `master` branch which is reserved for releases._
|
||||||
|
|
||||||
|
### Bug report
|
||||||
|
|
||||||
|
_To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._
|
||||||
|
|
||||||
|
When you file an issue, please go though the following checklist:
|
||||||
|
|
||||||
|
1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.
|
||||||
|
2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler.
|
||||||
|
3. Give the exact `configure` command used.
|
||||||
|
4. Attach `config.log`.
|
||||||
|
5. Attach `config.summary`.
|
||||||
|
6. Attach the output of `make V=1`.
|
||||||
|
7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Description
|
||||||
This library provides data parallel C++ container classes with internal memory layout
|
This library provides data parallel C++ container classes with internal memory layout
|
||||||
that is transformed to map efficiently to SIMD architectures. CSHIFT facilities
|
that is transformed to map efficiently to SIMD architectures. CSHIFT facilities
|
||||||
are provided, similar to HPF and cmfortran, and user control is given over the mapping of
|
are provided, similar to HPF and cmfortran, and user control is given over the mapping of
|
||||||
array indices to both MPI tasks and SIMD processing elements.
|
array indices to both MPI tasks and SIMD processing elements.
|
||||||
|
|
||||||
* Identically shaped arrays then be processed with perfect data parallelisation.
|
* Identically shaped arrays then be processed with perfect data parallelisation.
|
||||||
* Such identically shapped arrays are called conformable arrays.
|
* Such identically shaped arrays are called conformable arrays.
|
||||||
|
|
||||||
The transformation is based on the observation that Cartesian array processing involves
|
The transformation is based on the observation that Cartesian array processing involves
|
||||||
identical processing to be performed on different regions of the Cartesian array.
|
identical processing to be performed on different regions of the Cartesian array.
|
||||||
@ -20,31 +58,136 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi
|
|||||||
for most programmers.
|
for most programmers.
|
||||||
|
|
||||||
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
|
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
|
||||||
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported.
|
Presently SSE4 (128 bit) AVX, AVX2, QPX (256 bit), IMCI, and AVX512 (512 bit) targets are supported (ARM NEON on the way).
|
||||||
|
|
||||||
These are presented as
|
These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers.
|
||||||
|
The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`.
|
||||||
vRealF, vRealD, vComplexF, vComplexD
|
|
||||||
|
|
||||||
internal vector data types. These may be useful in themselves for other programmers.
|
|
||||||
The corresponding scalar types are named
|
|
||||||
|
|
||||||
RealF, RealD, ComplexF, ComplexD
|
|
||||||
|
|
||||||
MPI, OpenMP, and SIMD parallelism are present in the library.
|
MPI, OpenMP, and SIMD parallelism are present in the library.
|
||||||
|
Please see https://arxiv.org/abs/1512.03487 for more detail.
|
||||||
|
|
||||||
You can give `configure' initial values for configuration parameters
|
### Quick start
|
||||||
by setting variables in the command line or in the environment. Here
|
First, start by cloning the repository:
|
||||||
are examples:
|
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
|
``` bash
|
||||||
|
git clone https://github.com/paboyle/Grid.git
|
||||||
|
```
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1
|
Then enter the cloned directory and set up the build system:
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
|
``` bash
|
||||||
|
cd Grid
|
||||||
|
./bootstrap.sh
|
||||||
|
```
|
||||||
|
|
||||||
./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
|
Now you can execute the `configure` script to generate makefiles (here from a build directory):
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
mkdir build; cd build
|
||||||
|
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
|
||||||
|
```
|
||||||
|
|
||||||
For developers:
|
where `--enable-precision=` set the default precision,
|
||||||
Use reconfigure_script in the scripts/ directory to create the autotools environment
|
`--enable-simd=` set the SIMD type, `--enable-
|
||||||
|
comms=`, and `<path>` should be replaced by the prefix path where you want to
|
||||||
|
install Grid. Other options are detailed in the next section, you can also use `configure
|
||||||
|
--help` to display them. Like with any other program using GNU autotool, the
|
||||||
|
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
|
||||||
|
customise the build.
|
||||||
|
|
||||||
|
Finally, you can build and install Grid:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
make; make install
|
||||||
|
```
|
||||||
|
|
||||||
|
To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
make -C tests/<subdir> tests
|
||||||
|
```
|
||||||
|
If you want to build all the tests at once just use `make tests`.
|
||||||
|
|
||||||
|
### Build configuration options
|
||||||
|
|
||||||
|
- `--prefix=<path>`: installation prefix for Grid.
|
||||||
|
- `--with-gmp=<path>`: look for GMP in the UNIX prefix `<path>`
|
||||||
|
- `--with-mpfr=<path>`: look for MPFR in the UNIX prefix `<path>`
|
||||||
|
- `--with-fftw=<path>`: look for FFTW in the UNIX prefix `<path>`
|
||||||
|
- `--enable-lapack[=<path>]`: enable LAPACK support in Lanczos eigensolver. A UNIX prefix containing the library can be specified (optional).
|
||||||
|
- `--enable-mkl[=<path>]`: use Intel MKL for FFT (and LAPACK if enabled) routines. A UNIX prefix containing the library can be specified (optional).
|
||||||
|
- `--enable-numa`: ???
|
||||||
|
- `--enable-simd=<code>`: setup Grid for the SIMD target `<code>` (default: `GEN`). A list of possible SIMD targets is detailed in a section below.
|
||||||
|
- `--enable-precision={single|double}`: set the default precision (default: `double`).
|
||||||
|
- `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
|
||||||
|
- `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `).
|
||||||
|
- `--disable-timers`: disable system dependent high-resolution timers.
|
||||||
|
- `--enable-chroma`: enable Chroma regression tests.
|
||||||
|
|
||||||
|
### Possible communication interfaces
|
||||||
|
|
||||||
|
The following options can be use with the `--enable-comms=` option to target different communication interfaces:
|
||||||
|
|
||||||
|
| `<comm>` | Description |
|
||||||
|
| -------------- | ------------------------------------------------------------- |
|
||||||
|
| `none` | no communications |
|
||||||
|
| `mpi[-auto]` | MPI communications |
|
||||||
|
| `mpi3[-auto]` | MPI communications using MPI 3 shared memory |
|
||||||
|
| `mpi3l[-auto]` | MPI communications using MPI 3 shared memory and leader model |
|
||||||
|
| `shmem ` | Cray SHMEM communications |
|
||||||
|
|
||||||
|
For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names).
|
||||||
|
|
||||||
|
### Possible SIMD types
|
||||||
|
|
||||||
|
The following options can be use with the `--enable-simd=` option to target different SIMD instruction sets:
|
||||||
|
|
||||||
|
| `<code>` | Description |
|
||||||
|
| ----------- | -------------------------------------- |
|
||||||
|
| `GEN` | generic portable vector code |
|
||||||
|
| `SSE4` | SSE 4.2 (128 bit) |
|
||||||
|
| `AVX` | AVX (256 bit) |
|
||||||
|
| `AVXFMA` | AVX (256 bit) + FMA |
|
||||||
|
| `AVXFMA4` | AVX (256 bit) + FMA4 |
|
||||||
|
| `AVX2` | AVX 2 (256 bit) |
|
||||||
|
| `AVX512` | AVX 512 bit |
|
||||||
|
| `QPX` | QPX (256 bit) |
|
||||||
|
|
||||||
|
Alternatively, some CPU codenames can be directly used:
|
||||||
|
|
||||||
|
| `<code>` | Description |
|
||||||
|
| ----------- | -------------------------------------- |
|
||||||
|
| `KNC` | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
|
||||||
|
| `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |
|
||||||
|
| `BGQ` | Blue Gene/Q |
|
||||||
|
|
||||||
|
#### Notes:
|
||||||
|
- We currently support AVX512 only for the Intel compiler. Support for GCC and clang will appear in future versions of Grid when the AVX512 support within GCC and clang will be more advanced.
|
||||||
|
- For BG/Q only [bgclang](http://trac.alcf.anl.gov/projects/llvm-bgq) is supported. We do not presently plan to support more compilers for this platform.
|
||||||
|
- BG/Q performances are currently rather poor. This is being investigated for future versions.
|
||||||
|
|
||||||
|
### Build setup for Intel Knights Landing platform
|
||||||
|
|
||||||
|
The following configuration is recommended for the Intel Knights Landing platform:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
../configure --enable-precision=double\
|
||||||
|
--enable-simd=KNL \
|
||||||
|
--enable-comms=mpi-auto \
|
||||||
|
--with-gmp=<path> \
|
||||||
|
--with-mpfr=<path> \
|
||||||
|
--enable-mkl \
|
||||||
|
CXX=icpc MPICXX=mpiicpc
|
||||||
|
```
|
||||||
|
|
||||||
|
where `<path>` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
../configure --enable-precision=double\
|
||||||
|
--enable-simd=KNL \
|
||||||
|
--enable-comms=mpi \
|
||||||
|
--with-gmp=<path> \
|
||||||
|
--with-mpfr=<path> \
|
||||||
|
--enable-mkl \
|
||||||
|
CXX=CC CC=cc
|
||||||
|
```
|
22
TODO
22
TODO
@ -1,5 +1,27 @@
|
|||||||
TODO:
|
TODO:
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
* Forces; the UdSdU term in gauge force term is half of what I think it should
|
||||||
|
be. This is a consequence of taking ONLY the first term in:
|
||||||
|
|
||||||
|
dSg/dt = dU/dt dSdU + dUdag/dt dSdUdag
|
||||||
|
|
||||||
|
in the fermion force.
|
||||||
|
|
||||||
|
Now, S_mom = - tr Pmu Pmu ; Pmu anti-herm
|
||||||
|
|
||||||
|
.
|
||||||
|
d Smom/dt = - 2.0 tr Pmu Pmu = - dSg/dt = - tr Pmu [Umu dSdUmu + UmuDag dSdUmuDag]
|
||||||
|
|
||||||
|
.
|
||||||
|
=> Pmu = Umu dSdUmu
|
||||||
|
|
||||||
|
Where the norm is half expected.
|
||||||
|
|
||||||
|
This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two.
|
||||||
|
This 2x is applied by hand in the fermion routines and in the Test_rect_force routine.
|
||||||
|
|
||||||
|
|
||||||
Policies:
|
Policies:
|
||||||
|
|
||||||
* Link smearing/boundary conds; Policy class based implementation ; framework more in place
|
* Link smearing/boundary conds; Policy class based implementation ; framework more in place
|
||||||
|
6
VERSION
Normal file
6
VERSION
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
Version : 0.6.0
|
||||||
|
|
||||||
|
- AVX512, AVX2, AVX, SSE good
|
||||||
|
- Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above
|
||||||
|
- MPI and MPI3
|
||||||
|
- HiRep, Smearing, Generic gauge group
|
@ -1,4 +1,31 @@
|
|||||||
#include <Grid.h>
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./benchmarks/Benchmark_comms.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
@ -15,15 +42,14 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
int Nloop=10;
|
int Nloop=10;
|
||||||
int nmu=0;
|
int nmu=0;
|
||||||
for(int mu=0;mu<4;mu++) if (mpi_layout[mu]>1) nmu++;
|
for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++;
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||||
|
int maxlat=16;
|
||||||
|
for(int lat=4;lat<=maxlat;lat+=2){
|
||||||
|
|
||||||
for(int lat=4;lat<=32;lat+=2){
|
|
||||||
for(int Ls=1;Ls<=16;Ls*=2){
|
for(int Ls=1;Ls<=16;Ls*=2){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],
|
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||||
@ -98,7 +124,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
for(int lat=4;lat<=32;lat+=2){
|
for(int lat=4;lat<=maxlat;lat+=2){
|
||||||
for(int Ls=1;Ls<=16;Ls*=2){
|
for(int Ls=1;Ls<=16;Ls*=2){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||||
@ -168,6 +194,168 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Nloop=100;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||||
|
|
||||||
|
for(int lat=4;lat<=maxlat;lat+=2){
|
||||||
|
for(int Ls=1;Ls<=16;Ls*=2){
|
||||||
|
|
||||||
|
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||||
|
lat*mpi_layout[1],
|
||||||
|
lat*mpi_layout[2],
|
||||||
|
lat*mpi_layout[3]});
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||||
|
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||||
|
Grid.ShmBufferFreeAll();
|
||||||
|
for(int d=0;d<8;d++){
|
||||||
|
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
|
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
|
}
|
||||||
|
|
||||||
|
int ncomm;
|
||||||
|
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||||
|
|
||||||
|
double start=usecond();
|
||||||
|
for(int i=0;i<Nloop;i++){
|
||||||
|
|
||||||
|
std::vector<CartesianCommunicator::CommsRequest_t> requests;
|
||||||
|
|
||||||
|
ncomm=0;
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
|
||||||
|
if (mpi_layout[mu]>1 ) {
|
||||||
|
|
||||||
|
ncomm++;
|
||||||
|
int comm_proc=1;
|
||||||
|
int xmit_to_rank;
|
||||||
|
int recv_from_rank;
|
||||||
|
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
Grid.StencilSendToRecvFromBegin(requests,
|
||||||
|
(void *)&xbuf[mu][0],
|
||||||
|
xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu][0],
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
|
||||||
|
comm_proc = mpi_layout[mu]-1;
|
||||||
|
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
Grid.StencilSendToRecvFromBegin(requests,
|
||||||
|
(void *)&xbuf[mu+4][0],
|
||||||
|
xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu+4][0],
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Grid.StencilSendToRecvFromComplete(requests);
|
||||||
|
Grid.Barrier();
|
||||||
|
|
||||||
|
}
|
||||||
|
double stop=usecond();
|
||||||
|
|
||||||
|
double dbytes = bytes;
|
||||||
|
double xbytes = Nloop*dbytes*2.0*ncomm;
|
||||||
|
double rbytes = xbytes;
|
||||||
|
double bidibytes = xbytes+rbytes;
|
||||||
|
|
||||||
|
double time = stop-start; // microseconds
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Nloop=100;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||||
|
|
||||||
|
for(int lat=4;lat<=maxlat;lat+=2){
|
||||||
|
for(int Ls=1;Ls<=16;Ls*=2){
|
||||||
|
|
||||||
|
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||||
|
lat*mpi_layout[1],
|
||||||
|
lat*mpi_layout[2],
|
||||||
|
lat*mpi_layout[3]});
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||||
|
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||||
|
Grid.ShmBufferFreeAll();
|
||||||
|
for(int d=0;d<8;d++){
|
||||||
|
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
|
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
|
}
|
||||||
|
|
||||||
|
int ncomm;
|
||||||
|
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||||
|
|
||||||
|
double start=usecond();
|
||||||
|
for(int i=0;i<Nloop;i++){
|
||||||
|
|
||||||
|
std::vector<CartesianCommunicator::CommsRequest_t> requests;
|
||||||
|
|
||||||
|
ncomm=0;
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
|
||||||
|
if (mpi_layout[mu]>1 ) {
|
||||||
|
|
||||||
|
ncomm++;
|
||||||
|
int comm_proc=1;
|
||||||
|
int xmit_to_rank;
|
||||||
|
int recv_from_rank;
|
||||||
|
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
Grid.StencilSendToRecvFromBegin(requests,
|
||||||
|
(void *)&xbuf[mu][0],
|
||||||
|
xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu][0],
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
// Grid.StencilSendToRecvFromComplete(requests);
|
||||||
|
// requests.resize(0);
|
||||||
|
|
||||||
|
comm_proc = mpi_layout[mu]-1;
|
||||||
|
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
Grid.StencilSendToRecvFromBegin(requests,
|
||||||
|
(void *)&xbuf[mu+4][0],
|
||||||
|
xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu+4][0],
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
Grid.StencilSendToRecvFromComplete(requests);
|
||||||
|
requests.resize(0);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Grid.Barrier();
|
||||||
|
|
||||||
|
}
|
||||||
|
double stop=usecond();
|
||||||
|
|
||||||
|
double dbytes = bytes;
|
||||||
|
double xbytes = Nloop*dbytes*2.0*ncomm;
|
||||||
|
double rbytes = xbytes;
|
||||||
|
double bidibytes = xbytes+rbytes;
|
||||||
|
|
||||||
|
double time = stop-start; // microseconds
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,32 @@
|
|||||||
#include <Grid.h>
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./benchmarks/Benchmark_dwf.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
@ -16,6 +44,11 @@ struct scal {
|
|||||||
Gamma::GammaT
|
Gamma::GammaT
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
|
||||||
|
typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF;
|
||||||
|
typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD;
|
||||||
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
{
|
{
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
@ -24,12 +57,18 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
|
||||||
std::vector<int> latt4 = GridDefaultLatt();
|
std::vector<int> latt4 = GridDefaultLatt();
|
||||||
const int Ls=16;
|
const int Ls=8;
|
||||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
|
||||||
|
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||||
|
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
std::vector<int> seeds4({1,2,3,4});
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
std::vector<int> seeds5({5,6,7,8});
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
|
||||||
@ -42,9 +81,9 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion tmp(FGrid);
|
LatticeFermion tmp(FGrid);
|
||||||
LatticeFermion err(FGrid);
|
LatticeFermion err(FGrid);
|
||||||
|
|
||||||
ColourMatrix cm = Complex(1.0,0.0);
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
random(RNG4,Umu);
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
|
||||||
LatticeGaugeField Umu5d(FGrid);
|
LatticeGaugeField Umu5d(FGrid);
|
||||||
|
|
||||||
// replicate across fifth dimension
|
// replicate across fifth dimension
|
||||||
@ -79,16 +118,36 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
RealD M5 =1.8;
|
RealD M5 =1.8;
|
||||||
|
|
||||||
|
RealD NP = UGrid->_Nprocessors;
|
||||||
|
|
||||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
int ncall=10000;
|
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||||
{
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::Dhop "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
|
||||||
|
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||||
|
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
|
int ncall =100;
|
||||||
|
if (1) {
|
||||||
|
FGrid->Barrier();
|
||||||
|
Dw.ZeroCounters();
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
|
__SSC_START;
|
||||||
Dw.Dhop(src,result,0);
|
Dw.Dhop(src,result,0);
|
||||||
|
__SSC_STOP;
|
||||||
}
|
}
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double flops=1344*volume*ncall;
|
double flops=1344*volume*ncall;
|
||||||
@ -97,11 +156,165 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
err = ref-result;
|
err = ref-result;
|
||||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
|
assert (norm2(err)< 1.0e-4 );
|
||||||
Dw.Report();
|
Dw.Report();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (1)
|
||||||
|
{
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::Dhop "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
|
||||||
|
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||||
|
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
|
|
||||||
|
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
|
||||||
|
LatticeFermion ssrc(sFGrid);
|
||||||
|
LatticeFermion sref(sFGrid);
|
||||||
|
LatticeFermion sresult(sFGrid);
|
||||||
|
|
||||||
|
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
|
||||||
|
|
||||||
|
for(int x=0;x<latt4[0];x++){
|
||||||
|
for(int y=0;y<latt4[1];y++){
|
||||||
|
for(int z=0;z<latt4[2];z++){
|
||||||
|
for(int t=0;t<latt4[3];t++){
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
std::vector<int> site({s,x,y,z,t});
|
||||||
|
SpinColourVector tmp;
|
||||||
|
peekSite(tmp,src,site);
|
||||||
|
pokeSite(tmp,ssrc,site);
|
||||||
|
}}}}}
|
||||||
|
std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
|
||||||
|
FGrid->Barrier();
|
||||||
|
double t0=usecond();
|
||||||
|
sDw.ZeroCounters();
|
||||||
|
for(int i=0;i<ncall;i++){
|
||||||
|
__SSC_START;
|
||||||
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
|
__SSC_STOP;
|
||||||
|
}
|
||||||
|
double t1=usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
|
double flops=1344*volume*ncall;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
|
sDw.Report();
|
||||||
|
|
||||||
|
if(0){
|
||||||
|
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
|
||||||
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
|
PerformanceCounter Counter(i);
|
||||||
|
Counter.Start();
|
||||||
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
|
Counter.Stop();
|
||||||
|
Counter.Report();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
|
||||||
|
|
||||||
|
RealD sum=0;
|
||||||
|
for(int x=0;x<latt4[0];x++){
|
||||||
|
for(int y=0;y<latt4[1];y++){
|
||||||
|
for(int z=0;z<latt4[2];z++){
|
||||||
|
for(int t=0;t<latt4[3];t++){
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
std::vector<int> site({s,x,y,z,t});
|
||||||
|
SpinColourVector normal, simd;
|
||||||
|
peekSite(normal,result,site);
|
||||||
|
peekSite(simd,sresult,site);
|
||||||
|
sum=sum+norm2(normal-simd);
|
||||||
|
if (norm2(normal-simd) > 1.0e-6 ) {
|
||||||
|
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
|
||||||
|
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl;
|
||||||
|
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
|
||||||
|
}
|
||||||
|
}}}}}
|
||||||
|
std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
|
||||||
|
assert (sum< 1.0e-4 );
|
||||||
|
|
||||||
|
|
||||||
|
if (1) {
|
||||||
|
|
||||||
|
LatticeFermion sr_eo(sFGrid);
|
||||||
|
|
||||||
|
LatticeFermion ssrc_e (sFrbGrid);
|
||||||
|
LatticeFermion ssrc_o (sFrbGrid);
|
||||||
|
LatticeFermion sr_e (sFrbGrid);
|
||||||
|
LatticeFermion sr_o (sFrbGrid);
|
||||||
|
|
||||||
|
pickCheckerboard(Even,ssrc_e,ssrc);
|
||||||
|
pickCheckerboard(Odd,ssrc_o,ssrc);
|
||||||
|
|
||||||
|
setCheckerboard(sr_eo,ssrc_o);
|
||||||
|
setCheckerboard(sr_eo,ssrc_e);
|
||||||
|
|
||||||
|
sr_e = zero;
|
||||||
|
sr_o = zero;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::DhopEO "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
|
||||||
|
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||||
|
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
|
|
||||||
|
FGrid->Barrier();
|
||||||
|
sDw.ZeroCounters();
|
||||||
|
sDw.stat.init("DhopEO");
|
||||||
|
double t0=usecond();
|
||||||
|
for (int i = 0; i < ncall; i++) {
|
||||||
|
sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
|
||||||
|
}
|
||||||
|
double t1=usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
sDw.stat.print();
|
||||||
|
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
|
double flops=(1344.0*volume*ncall)/2;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "sDeo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
|
sDw.Report();
|
||||||
|
|
||||||
|
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
||||||
|
sDw.DhopOE(ssrc_e,sr_o,DaggerNo);
|
||||||
|
sDw.Dhop (ssrc ,sresult,DaggerNo);
|
||||||
|
|
||||||
|
pickCheckerboard(Even,ssrc_e,sresult);
|
||||||
|
pickCheckerboard(Odd ,ssrc_o,sresult);
|
||||||
|
ssrc_e = ssrc_e - sr_e;
|
||||||
|
RealD error = norm2(ssrc_e);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<< " vec nrm"<<norm2(sr_e) <<std::endl;
|
||||||
|
ssrc_o = ssrc_o - sr_o;
|
||||||
|
|
||||||
|
error+= norm2(ssrc_o);
|
||||||
|
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl;
|
||||||
|
if(error>1.0e-4) {
|
||||||
|
setCheckerboard(ssrc,ssrc_o);
|
||||||
|
setCheckerboard(ssrc,ssrc_e);
|
||||||
|
std::cout<< ssrc << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
if (1)
|
if (1)
|
||||||
{ // Naive wilson dag implementation
|
{ // Naive wilson dag implementation
|
||||||
@ -111,24 +324,25 @@ int main (int argc, char ** argv)
|
|||||||
// ref = src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
|
// ref = src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
|
||||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
tmp = U[mu]*Cshift(src,mu+1,1);
|
||||||
for(int i=0;i<ref._odata.size();i++){
|
for(int i=0;i<ref._odata.size();i++){
|
||||||
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
|
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp =adj(U[mu])*src;
|
tmp =adj(U[mu])*src;
|
||||||
tmp =Cshift(tmp,mu+1,-1);
|
tmp =Cshift(tmp,mu+1,-1);
|
||||||
for(int i=0;i<ref._odata.size();i++){
|
for(int i=0;i<ref._odata.size();i++){
|
||||||
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
|
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ref = -0.5*ref;
|
ref = -0.5*ref;
|
||||||
}
|
}
|
||||||
Dw.Dhop(src,result,1);
|
Dw.Dhop(src,result,1);
|
||||||
|
std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl;
|
||||||
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
|
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
err = ref-result;
|
err = ref-result;
|
||||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
|
assert(norm2(err)<1.0e-4);
|
||||||
LatticeFermion src_e (FrbGrid);
|
LatticeFermion src_e (FrbGrid);
|
||||||
LatticeFermion src_o (FrbGrid);
|
LatticeFermion src_o (FrbGrid);
|
||||||
LatticeFermion r_e (FrbGrid);
|
LatticeFermion r_e (FrbGrid);
|
||||||
@ -136,26 +350,39 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion r_eo (FGrid);
|
LatticeFermion r_eo (FGrid);
|
||||||
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Calling Deo and Doe"<<std::endl;
|
std::cout<<GridLogMessage << "Calling Deo and Doe and assert Deo+Doe == Dunprec"<<std::endl;
|
||||||
pickCheckerboard(Even,src_e,src);
|
pickCheckerboard(Even,src_e,src);
|
||||||
pickCheckerboard(Odd,src_o,src);
|
pickCheckerboard(Odd,src_o,src);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl;
|
std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
|
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
|
||||||
|
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||||
|
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
{
|
{
|
||||||
|
Dw.ZeroCounters();
|
||||||
|
FGrid->Barrier();
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
}
|
}
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double flops=(1344.0*volume*ncall)/2;
|
double flops=(1344.0*volume*ncall)/2;
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
|
Dw.Report();
|
||||||
}
|
}
|
||||||
|
|
||||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
Dw.DhopOE(src_e,r_o,DaggerNo);
|
Dw.DhopOE(src_e,r_o,DaggerNo);
|
||||||
Dw.Dhop (src ,result,DaggerNo);
|
Dw.Dhop (src ,result,DaggerNo);
|
||||||
@ -169,11 +396,14 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
err = r_eo-result;
|
err = r_eo-result;
|
||||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
|
assert(norm2(err)<1.0e-4);
|
||||||
|
|
||||||
pickCheckerboard(Even,src_e,err);
|
pickCheckerboard(Even,src_e,err);
|
||||||
pickCheckerboard(Odd,src_o,err);
|
pickCheckerboard(Odd,src_o,err);
|
||||||
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
|
||||||
|
assert(norm2(src_e)<1.0e-4);
|
||||||
|
assert(norm2(src_o)<1.0e-4);
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
366
benchmarks/Benchmark_dwf_sweep.cc
Normal file
366
benchmarks/Benchmark_dwf_sweep.cc
Normal file
@ -0,0 +1,366 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./benchmarks/Benchmark_dwf.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
template<class d>
|
||||||
|
struct scal {
|
||||||
|
d internal;
|
||||||
|
};
|
||||||
|
|
||||||
|
Gamma::GammaMatrix Gmu [] = {
|
||||||
|
Gamma::GammaX,
|
||||||
|
Gamma::GammaY,
|
||||||
|
Gamma::GammaZ,
|
||||||
|
Gamma::GammaT
|
||||||
|
};
|
||||||
|
|
||||||
|
void benchDw(std::vector<int> & L, int Ls, int threads, int report =0 );
|
||||||
|
void benchsDw(std::vector<int> & L, int Ls, int threads, int report=0 );
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
|
const int Ls=8;
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking DWF"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s) "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
|
|
||||||
|
int Lmax=16;
|
||||||
|
int dmin=2;
|
||||||
|
if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX"));
|
||||||
|
if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN"));
|
||||||
|
for (int L=8;L<=Lmax;L*=2){
|
||||||
|
std::vector<int> latt4(4,L);
|
||||||
|
for(int d=4;d>dmin;d--){
|
||||||
|
if ( d<=3 ) latt4[d]*=2;
|
||||||
|
std::cout << GridLogMessage <<"\t";
|
||||||
|
for(int d=0;d<Nd;d++){
|
||||||
|
std::cout<<latt4[d]<<"x";
|
||||||
|
}
|
||||||
|
std::cout <<Ls<<"\t" ;
|
||||||
|
benchDw (latt4,Ls,threads,0);
|
||||||
|
benchsDw(latt4,Ls,threads,0);
|
||||||
|
std::cout<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
|
{
|
||||||
|
std::vector<int> latt4(4,16);
|
||||||
|
std::cout<<GridLogMessage << "16^4 Dw miss rate"<<std::endl;
|
||||||
|
benchDw (latt4,Ls,threads,1);
|
||||||
|
std::cout<<GridLogMessage << "16^4 sDw miss rate"<<std::endl;
|
||||||
|
benchsDw(latt4,Ls,threads,1);
|
||||||
|
}
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef CHECK
|
||||||
|
|
||||||
|
void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
|
||||||
|
{
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
|
||||||
|
#ifdef CHECK
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
LatticeFermion src (FGrid); random(RNG5,src);
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
random(RNG4,Umu);
|
||||||
|
#else
|
||||||
|
LatticeFermion src (FGrid); src=zero;
|
||||||
|
LatticeGaugeField Umu(UGrid); Umu=zero;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
LatticeFermion result(FGrid); result=zero;
|
||||||
|
LatticeFermion ref(FGrid); ref=zero;
|
||||||
|
LatticeFermion tmp(FGrid);
|
||||||
|
LatticeFermion err(FGrid);
|
||||||
|
|
||||||
|
ColourMatrix cm = Complex(1.0,0.0);
|
||||||
|
|
||||||
|
LatticeGaugeField Umu5d(FGrid);
|
||||||
|
|
||||||
|
// replicate across fifth dimension
|
||||||
|
for(int ss=0;ss<Umu._grid->oSites();ss++){
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Naive wilson implementation
|
||||||
|
////////////////////////////////////
|
||||||
|
std::vector<LatticeColourMatrix> U(4,FGrid);
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CHECK
|
||||||
|
if (1) {
|
||||||
|
|
||||||
|
ref = zero;
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
tmp = U[mu]*Cshift(src,mu+1,1);
|
||||||
|
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
|
||||||
|
|
||||||
|
tmp =adj(U[mu])*src;
|
||||||
|
tmp =Cshift(tmp,mu+1,-1);
|
||||||
|
ref=ref + tmp + Gamma(Gmu[mu])*tmp;
|
||||||
|
}
|
||||||
|
ref = -0.5*ref;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
RealD mass=0.1;
|
||||||
|
RealD M5 =1.8;
|
||||||
|
RealD NP = UGrid->_Nprocessors;
|
||||||
|
|
||||||
|
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
|
||||||
|
double t0=usecond();
|
||||||
|
Dw.Dhop(src,result,0);
|
||||||
|
double t1=usecond();
|
||||||
|
|
||||||
|
#ifdef TIMERS_OFF
|
||||||
|
int ncall =10;
|
||||||
|
#else
|
||||||
|
int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (ncall < 5 ) exit(0);
|
||||||
|
|
||||||
|
Dw.Dhop(src,result,0);
|
||||||
|
|
||||||
|
PerformanceCounter Counter(8);
|
||||||
|
Counter.Start();
|
||||||
|
t0=usecond();
|
||||||
|
for(int i=0;i<ncall;i++){
|
||||||
|
Dw.Dhop(src,result,0);
|
||||||
|
}
|
||||||
|
t1=usecond();
|
||||||
|
Counter.Stop();
|
||||||
|
if ( report ) {
|
||||||
|
Counter.Report();
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! report ) {
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
|
double flops=1344*volume*ncall;
|
||||||
|
std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t";
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CHECK
|
||||||
|
err = ref-result;
|
||||||
|
RealD errd = norm2(err);
|
||||||
|
if ( errd> 1.0e-4 ) {
|
||||||
|
std::cout<<GridLogMessage << "oops !!! norm diff "<< norm2(err)<<std::endl;
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
LatticeFermion src_e (FrbGrid);
|
||||||
|
LatticeFermion src_o (FrbGrid);
|
||||||
|
LatticeFermion r_e (FrbGrid);
|
||||||
|
LatticeFermion r_o (FrbGrid);
|
||||||
|
LatticeFermion r_eo (FGrid);
|
||||||
|
|
||||||
|
pickCheckerboard(Even,src_e,src);
|
||||||
|
pickCheckerboard(Odd,src_o,src);
|
||||||
|
|
||||||
|
{
|
||||||
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
|
double t0=usecond();
|
||||||
|
for(int i=0;i<ncall;i++){
|
||||||
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
|
}
|
||||||
|
double t1=usecond();
|
||||||
|
|
||||||
|
if(!report){
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
|
double flops=(1344.0*volume*ncall)/2;
|
||||||
|
std::cout<< flops/(t1-t0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CHECK_SDW
|
||||||
|
void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
|
||||||
|
{
|
||||||
|
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||||
|
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
|
||||||
|
#ifdef CHECK_SDW
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
LatticeFermion src (FGrid); random(RNG5,src);
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
random(RNG4,Umu);
|
||||||
|
#else
|
||||||
|
LatticeFermion src (FGrid); src=zero;
|
||||||
|
LatticeGaugeField Umu(UGrid); Umu=zero;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
LatticeFermion result(FGrid); result=zero;
|
||||||
|
LatticeFermion ref(FGrid); ref=zero;
|
||||||
|
LatticeFermion tmp(FGrid);
|
||||||
|
LatticeFermion err(FGrid);
|
||||||
|
|
||||||
|
ColourMatrix cm = Complex(1.0,0.0);
|
||||||
|
|
||||||
|
LatticeGaugeField Umu5d(FGrid);
|
||||||
|
|
||||||
|
// replicate across fifth dimension
|
||||||
|
for(int ss=0;ss<Umu._grid->oSites();ss++){
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RealD mass=0.1;
|
||||||
|
RealD M5 =1.8;
|
||||||
|
|
||||||
|
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
|
||||||
|
LatticeFermion ssrc(sFGrid);
|
||||||
|
LatticeFermion sref(sFGrid);
|
||||||
|
LatticeFermion sresult(sFGrid);
|
||||||
|
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
|
||||||
|
|
||||||
|
for(int x=0;x<latt4[0];x++){
|
||||||
|
for(int y=0;y<latt4[1];y++){
|
||||||
|
for(int z=0;z<latt4[2];z++){
|
||||||
|
for(int t=0;t<latt4[3];t++){
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
std::vector<int> site({s,x,y,z,t});
|
||||||
|
SpinColourVector tmp;
|
||||||
|
peekSite(tmp,src,site);
|
||||||
|
pokeSite(tmp,ssrc,site);
|
||||||
|
}}}}}
|
||||||
|
|
||||||
|
double t0=usecond();
|
||||||
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
|
double t1=usecond();
|
||||||
|
|
||||||
|
#ifdef TIMERS_OFF
|
||||||
|
int ncall =10;
|
||||||
|
#else
|
||||||
|
int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
PerformanceCounter Counter(8);
|
||||||
|
Counter.Start();
|
||||||
|
t0=usecond();
|
||||||
|
for(int i=0;i<ncall;i++){
|
||||||
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
|
}
|
||||||
|
t1=usecond();
|
||||||
|
Counter.Stop();
|
||||||
|
|
||||||
|
if ( report ) {
|
||||||
|
Counter.Report();
|
||||||
|
} else {
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
|
double flops=1344*volume*ncall;
|
||||||
|
std::cout<<"\t"<< flops/(t1-t0);
|
||||||
|
}
|
||||||
|
|
||||||
|
LatticeFermion sr_eo(sFGrid);
|
||||||
|
LatticeFermion serr(sFGrid);
|
||||||
|
|
||||||
|
LatticeFermion ssrc_e (sFrbGrid);
|
||||||
|
LatticeFermion ssrc_o (sFrbGrid);
|
||||||
|
LatticeFermion sr_e (sFrbGrid);
|
||||||
|
LatticeFermion sr_o (sFrbGrid);
|
||||||
|
|
||||||
|
pickCheckerboard(Even,ssrc_e,ssrc);
|
||||||
|
pickCheckerboard(Odd,ssrc_o,ssrc);
|
||||||
|
|
||||||
|
setCheckerboard(sr_eo,ssrc_o);
|
||||||
|
setCheckerboard(sr_eo,ssrc_e);
|
||||||
|
|
||||||
|
sr_e = zero;
|
||||||
|
sr_o = zero;
|
||||||
|
|
||||||
|
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
||||||
|
PerformanceCounter CounterSdw(8);
|
||||||
|
CounterSdw.Start();
|
||||||
|
t0=usecond();
|
||||||
|
for(int i=0;i<ncall;i++){
|
||||||
|
__SSC_START;
|
||||||
|
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
||||||
|
__SSC_STOP;
|
||||||
|
}
|
||||||
|
t1=usecond();
|
||||||
|
CounterSdw.Stop();
|
||||||
|
|
||||||
|
if ( report ) {
|
||||||
|
CounterSdw.Report();
|
||||||
|
} else {
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
|
double flops=(1344.0*volume*ncall)/2;
|
||||||
|
std::cout<<"\t"<< flops/(t1-t0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -1,4 +1,32 @@
|
|||||||
#include <Grid.h>
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./benchmarks/Benchmark_memory_asynch.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
@ -1,4 +1,32 @@
|
|||||||
#include <Grid.h>
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./benchmarks/Benchmark_memory_bandwidth.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
@ -1,4 +1,32 @@
|
|||||||
#include <Grid.h>
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./benchmarks/Benchmark_su3.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
@ -1,4 +1,32 @@
|
|||||||
#include <Grid.h>
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./benchmarks/Benchmark_wilson.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
@ -16,10 +44,15 @@ struct scal {
|
|||||||
Gamma::GammaT
|
Gamma::GammaT
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bool overlapComms = false;
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
{
|
{
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){
|
||||||
|
overlapComms = true;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<int> latt_size = GridDefaultLatt();
|
std::vector<int> latt_size = GridDefaultLatt();
|
||||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||||
@ -57,11 +90,12 @@ int main (int argc, char ** argv)
|
|||||||
Complex cone(1.0,0.0);
|
Complex cone(1.0,0.0);
|
||||||
for(int nn=0;nn<Nd;nn++){
|
for(int nn=0;nn<Nd;nn++){
|
||||||
random(pRNG,U[nn]);
|
random(pRNG,U[nn]);
|
||||||
if(0) {
|
if(1) {
|
||||||
if (nn==-1) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
|
if (nn!=2) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
|
||||||
else { U[nn] = cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
|
// else { U[nn]= cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
|
||||||
|
else { std::cout<<GridLogMessage << "random gauge field in dir "<<nn<<std::endl; }
|
||||||
}
|
}
|
||||||
pokeIndex<LorentzIndex>(Umu,U[nn],nn);
|
PokeIndex<LorentzIndex>(Umu,U[nn],nn);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -87,7 +121,11 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
ref = -0.5*ref;
|
ref = -0.5*ref;
|
||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
|
|
||||||
|
typename WilsonFermionR::ImplParams params;
|
||||||
|
params.overlapCommsCompute = overlapComms;
|
||||||
|
|
||||||
|
WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
|
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
|
||||||
int ncall=1000;
|
int ncall=1000;
|
||||||
|
130
benchmarks/Benchmark_wilson_sweep.cc
Normal file
130
benchmarks/Benchmark_wilson_sweep.cc
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
Source file: ./benchmarks/Benchmark_wilson.cc
|
||||||
|
Copyright (C) 2015
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Richard Rollins <rprollins@users.noreply.github.com>
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
template<class d>
|
||||||
|
struct scal {
|
||||||
|
d internal;
|
||||||
|
};
|
||||||
|
|
||||||
|
Gamma::GammaMatrix Gmu [] = {
|
||||||
|
Gamma::GammaX,
|
||||||
|
Gamma::GammaY,
|
||||||
|
Gamma::GammaZ,
|
||||||
|
Gamma::GammaT
|
||||||
|
};
|
||||||
|
|
||||||
|
bool overlapComms = false;
|
||||||
|
|
||||||
|
void bench_wilson (
|
||||||
|
LatticeFermion & src,
|
||||||
|
LatticeFermion & result,
|
||||||
|
WilsonFermionR & Dw,
|
||||||
|
double const volume,
|
||||||
|
int const dag );
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){ overlapComms = true; }
|
||||||
|
typename WilsonFermionR::ImplParams params;
|
||||||
|
params.overlapCommsCompute = overlapComms;
|
||||||
|
|
||||||
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||||
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
std::vector<int> seeds({1,2,3,4});
|
||||||
|
RealD mass = 0.1;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Benchmarking WilsonFermionR::Dhop "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
|
||||||
|
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||||
|
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking Wilson" << std::endl;
|
||||||
|
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Volume\t\t\tWilson/MFLOPs\tWilsonDag/MFLOPs" << std::endl;
|
||||||
|
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
|
||||||
|
|
||||||
|
int Lmax = 32;
|
||||||
|
int dmin = 0;
|
||||||
|
if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX"));
|
||||||
|
if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN"));
|
||||||
|
for (int L=8; L<=Lmax; L*=2)
|
||||||
|
{
|
||||||
|
std::vector<int> latt_size = std::vector<int>(4,L);
|
||||||
|
for(int d=4; d>dmin; d--)
|
||||||
|
{
|
||||||
|
if ( d<=3 ) { latt_size[d] *= 2; }
|
||||||
|
|
||||||
|
std::cout << GridLogMessage;
|
||||||
|
std::copy( latt_size.begin(), --latt_size.end(), std::ostream_iterator<int>( std::cout, std::string("x").c_str() ) );
|
||||||
|
std::cout << latt_size.back() << "\t\t";
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
|
||||||
|
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||||
|
LatticeFermion src(&Grid); random(pRNG,src);
|
||||||
|
LatticeFermion result(&Grid); result=zero;
|
||||||
|
|
||||||
|
double volume = std::accumulate(latt_size.begin(),latt_size.end(),1,std::multiplies<int>());
|
||||||
|
|
||||||
|
WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params);
|
||||||
|
|
||||||
|
bench_wilson(src,result,Dw,volume,DaggerNo);
|
||||||
|
bench_wilson(src,result,Dw,volume,DaggerYes);
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
void bench_wilson (
|
||||||
|
LatticeFermion & src,
|
||||||
|
LatticeFermion & result,
|
||||||
|
WilsonFermionR & Dw,
|
||||||
|
double const volume,
|
||||||
|
int const dag )
|
||||||
|
{
|
||||||
|
int ncall = 1000;
|
||||||
|
double t0 = usecond();
|
||||||
|
for(int i=0; i<ncall; i++) { Dw.Dhop(src,result,dag); }
|
||||||
|
double t1 = usecond();
|
||||||
|
double flops = 1344 * volume * ncall;
|
||||||
|
std::cout << flops/(t1-t0) << "\t\t";
|
||||||
|
}
|
@ -1,27 +0,0 @@
|
|||||||
|
|
||||||
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_memory_asynch Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_comms_SOURCES=Benchmark_comms.cc
|
|
||||||
Benchmark_comms_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_dwf_SOURCES=Benchmark_dwf.cc
|
|
||||||
Benchmark_dwf_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_memory_asynch_SOURCES=Benchmark_memory_asynch.cc
|
|
||||||
Benchmark_memory_asynch_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_memory_bandwidth_SOURCES=Benchmark_memory_bandwidth.cc
|
|
||||||
Benchmark_memory_bandwidth_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_su3_SOURCES=Benchmark_su3.cc
|
|
||||||
Benchmark_su3_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_wilson_SOURCES=Benchmark_wilson.cc
|
|
||||||
Benchmark_wilson_LDADD=-lGrid
|
|
||||||
|
|
@ -1,8 +1 @@
|
|||||||
# additional include paths necessary to compile the C++ library
|
|
||||||
AM_CXXFLAGS = -I$(top_srcdir)/lib
|
|
||||||
AM_LDFLAGS = -L$(top_builddir)/lib
|
|
||||||
|
|
||||||
#
|
|
||||||
# Test code
|
|
||||||
#
|
|
||||||
include Make.inc
|
include Make.inc
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./benchmarks/simple_su3_expr.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./benchmarks/simple_su3_test.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
13
bootstrap.sh
Executable file
13
bootstrap.sh
Executable file
@ -0,0 +1,13 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
|
||||||
|
|
||||||
|
echo "-- deploying Eigen source..."
|
||||||
|
wget ${EIGEN_URL} --no-check-certificate
|
||||||
|
./scripts/update_eigen.sh `basename ${EIGEN_URL}`
|
||||||
|
rm `basename ${EIGEN_URL}`
|
||||||
|
|
||||||
|
echo '-- generating Make.inc files...'
|
||||||
|
./scripts/filelist
|
||||||
|
echo '-- generating configure script...'
|
||||||
|
autoreconf -fvi
|
485
configure.ac
485
configure.ac
@ -1,226 +1,405 @@
|
|||||||
# -*- Autoconf -*-
|
|
||||||
# Process this file with autoconf to produce a configure script.
|
|
||||||
#
|
|
||||||
# Project Grid package
|
|
||||||
#
|
|
||||||
# Time-stamp: <2015-07-10 17:46:21 neo>
|
|
||||||
|
|
||||||
AC_PREREQ([2.63])
|
AC_PREREQ([2.63])
|
||||||
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
|
AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid])
|
||||||
AC_CANONICAL_SYSTEM
|
AC_CANONICAL_BUILD
|
||||||
|
AC_CANONICAL_HOST
|
||||||
|
AC_CANONICAL_TARGET
|
||||||
AM_INIT_AUTOMAKE(subdir-objects)
|
AM_INIT_AUTOMAKE(subdir-objects)
|
||||||
AC_CONFIG_MACRO_DIR([m4])
|
AC_CONFIG_MACRO_DIR([m4])
|
||||||
AC_CONFIG_SRCDIR([lib/Grid.h])
|
AC_CONFIG_SRCDIR([lib/Grid.h])
|
||||||
AC_CONFIG_HEADERS([lib/Config.h])
|
AC_CONFIG_HEADERS([lib/Config.h])
|
||||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||||
|
|
||||||
AC_MSG_NOTICE([
|
############### Checks for programs
|
||||||
|
CXXFLAGS="-O3 $CXXFLAGS"
|
||||||
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
|
||||||
Configuring $PACKAGE v$VERSION for $host
|
|
||||||
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
|
||||||
])
|
|
||||||
|
|
||||||
# Checks for programs.
|
|
||||||
AC_LANG(C++)
|
|
||||||
AC_PROG_CXX
|
AC_PROG_CXX
|
||||||
AC_OPENMP
|
|
||||||
AC_PROG_RANLIB
|
AC_PROG_RANLIB
|
||||||
#AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
|
|
||||||
AX_EXT
|
|
||||||
|
|
||||||
# Checks for libraries.
|
############### Get compiler informations
|
||||||
#AX_GCC_VAR_ATTRIBUTE(aligned)
|
AC_LANG([C++])
|
||||||
|
AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])
|
||||||
|
AX_COMPILER_VENDOR
|
||||||
|
AC_DEFINE_UNQUOTED([CXX_COMP_VENDOR],["$ax_cv_cxx_compiler_vendor"],
|
||||||
|
[vendor of C++ compiler that will compile the code])
|
||||||
|
AX_GXX_VERSION
|
||||||
|
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
|
||||||
|
[version of g++ that will compile the code])
|
||||||
|
|
||||||
# Checks for header files.
|
############### Checks for typedefs, structures, and compiler characteristics
|
||||||
|
AC_TYPE_SIZE_T
|
||||||
|
AC_TYPE_UINT32_T
|
||||||
|
AC_TYPE_UINT64_T
|
||||||
|
|
||||||
|
############### OpenMP
|
||||||
|
AC_OPENMP
|
||||||
|
ac_openmp=no
|
||||||
|
if test "${OPENMP_CXXFLAGS}X" != "X"; then
|
||||||
|
ac_openmp=yes
|
||||||
|
AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS"
|
||||||
|
AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS"
|
||||||
|
fi
|
||||||
|
|
||||||
|
############### Checks for header files
|
||||||
AC_CHECK_HEADERS(stdint.h)
|
AC_CHECK_HEADERS(stdint.h)
|
||||||
AC_CHECK_HEADERS(mm_malloc.h)
|
AC_CHECK_HEADERS(mm_malloc.h)
|
||||||
AC_CHECK_HEADERS(malloc/malloc.h)
|
AC_CHECK_HEADERS(malloc/malloc.h)
|
||||||
AC_CHECK_HEADERS(malloc.h)
|
AC_CHECK_HEADERS(malloc.h)
|
||||||
AC_CHECK_HEADERS(endian.h)
|
AC_CHECK_HEADERS(endian.h)
|
||||||
AC_CHECK_HEADERS(execinfo.h)
|
AC_CHECK_HEADERS(execinfo.h)
|
||||||
AC_CHECK_HEADERS(gmp.h)
|
|
||||||
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
|
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
|
||||||
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
|
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
|
||||||
|
|
||||||
# Checks for typedefs, structures, and compiler characteristics.
|
############### GMP and MPFR
|
||||||
AC_TYPE_SIZE_T
|
AC_ARG_WITH([gmp],
|
||||||
AC_TYPE_UINT32_T
|
[AS_HELP_STRING([--with-gmp=prefix],
|
||||||
AC_TYPE_UINT64_T
|
[try this for a non-standard install prefix of the GMP library])],
|
||||||
|
[AM_CXXFLAGS="-I$with_gmp/include $AM_CXXFLAGS"]
|
||||||
|
[AM_LDFLAGS="-L$with_gmp/lib $AM_LDFLAGS"])
|
||||||
|
AC_ARG_WITH([mpfr],
|
||||||
|
[AS_HELP_STRING([--with-mpfr=prefix],
|
||||||
|
[try this for a non-standard install prefix of the MPFR library])],
|
||||||
|
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
|
||||||
|
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
|
||||||
|
|
||||||
# Checks for library functions.
|
############### FFTW3
|
||||||
echo
|
AC_ARG_WITH([fftw],
|
||||||
echo Checking libraries
|
[AS_HELP_STRING([--with-fftw=prefix],
|
||||||
echo :::::::::::::::::::::::::::::::::::::::::::
|
[try this for a non-standard install prefix of the FFTW3 library])],
|
||||||
|
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
|
||||||
|
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
|
||||||
|
|
||||||
|
############### lapack
|
||||||
|
AC_ARG_ENABLE([lapack],
|
||||||
|
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
|
||||||
|
[ac_LAPACK=${enable_lapack}], [ac_LAPACK=no])
|
||||||
|
|
||||||
|
case ${ac_LAPACK} in
|
||||||
|
no)
|
||||||
|
;;
|
||||||
|
yes)
|
||||||
|
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
|
||||||
|
*)
|
||||||
|
AM_CXXFLAGS="-I$ac_LAPACK/include $AM_CXXFLAGS"
|
||||||
|
AM_LDFLAGS="-L$ac_LAPACK/lib $AM_LDFLAGS"
|
||||||
|
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
|
||||||
|
esac
|
||||||
|
|
||||||
|
############### MKL
|
||||||
|
AC_ARG_ENABLE([mkl],
|
||||||
|
[AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])],
|
||||||
|
[ac_MKL=${enable_mkl}], [ac_MKL=no])
|
||||||
|
|
||||||
|
case ${ac_MKL} in
|
||||||
|
no)
|
||||||
|
;;
|
||||||
|
yes)
|
||||||
|
AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);;
|
||||||
|
*)
|
||||||
|
AM_CXXFLAGS="-I$ac_MKL/include $AM_CXXFLAGS"
|
||||||
|
AM_LDFLAGS="-L$ac_MKL/lib $AM_LDFLAGS"
|
||||||
|
AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);;
|
||||||
|
esac
|
||||||
|
|
||||||
|
############### first-touch
|
||||||
|
AC_ARG_ENABLE([numa],
|
||||||
|
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
|
||||||
|
[ac_NUMA=${enable_NUMA}],[ac_NUMA=no])
|
||||||
|
|
||||||
|
case ${ac_NUMA} in
|
||||||
|
no)
|
||||||
|
;;
|
||||||
|
yes)
|
||||||
|
AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);;
|
||||||
|
*)
|
||||||
|
AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);;
|
||||||
|
esac
|
||||||
|
|
||||||
|
############### Checks for library functions
|
||||||
|
CXXFLAGS_CPY=$CXXFLAGS
|
||||||
|
LDFLAGS_CPY=$LDFLAGS
|
||||||
|
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
||||||
|
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
||||||
|
|
||||||
AC_CHECK_FUNCS([gettimeofday])
|
AC_CHECK_FUNCS([gettimeofday])
|
||||||
|
|
||||||
#AC_CHECK_LIB([gmp],[__gmpf_init],,
|
if test "${ac_MKL}x" != "nox"; then
|
||||||
# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
AC_SEARCH_LIBS([mkl_set_interface_layer], [mkl_rt], [],
|
||||||
#Please install or provide the correct path to your installation
|
[AC_MSG_ERROR("MKL enabled but library not found")])
|
||||||
#Info at: http://www.gmplib.org)])
|
fi
|
||||||
|
|
||||||
#AC_CHECK_LIB([mpfr],[mpfr_init],,
|
AC_SEARCH_LIBS([__gmpf_init], [gmp],
|
||||||
# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
[AC_SEARCH_LIBS([mpfr_init], [mpfr],
|
||||||
#Please install or provide the correct path to your installation
|
[AC_DEFINE([HAVE_LIBMPFR], [1],
|
||||||
#Info at: http://www.mpfr.org/)])
|
[Define to 1 if you have the `MPFR' library])]
|
||||||
|
[have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])]
|
||||||
|
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])]
|
||||||
|
[have_gmp=true])
|
||||||
|
|
||||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|IMCI],\
|
if test "${ac_LAPACK}x" != "nox"; then
|
||||||
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
|
AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [],
|
||||||
[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2])
|
[AC_MSG_ERROR("LAPACK enabled but library not found")])
|
||||||
|
fi
|
||||||
|
|
||||||
supported=no
|
AC_SEARCH_LIBS([fftw_execute], [fftw3],
|
||||||
|
[AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [],
|
||||||
|
[AC_MSG_ERROR("single precision FFTW library not found")])]
|
||||||
|
[AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])]
|
||||||
|
[have_fftw=true])
|
||||||
|
|
||||||
|
CXXFLAGS=$CXXFLAGS_CPY
|
||||||
|
LDFLAGS=$LDFLAGS_CPY
|
||||||
|
|
||||||
|
############### SIMD instruction selection
|
||||||
|
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=<code>],
|
||||||
|
[select SIMD target (cf. README.md)])], [ac_SIMD=${enable_simd}], [ac_SIMD=GEN])
|
||||||
|
|
||||||
|
case ${ax_cv_cxx_compiler_vendor} in
|
||||||
|
clang|gnu)
|
||||||
|
case ${ac_SIMD} in
|
||||||
|
SSE4)
|
||||||
|
AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
|
||||||
|
SIMD_FLAGS='-msse4.2';;
|
||||||
|
AVX)
|
||||||
|
AC_DEFINE([AVX1],[1],[AVX intrinsics])
|
||||||
|
SIMD_FLAGS='-mavx';;
|
||||||
|
AVXFMA4)
|
||||||
|
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
|
||||||
|
SIMD_FLAGS='-mavx -mfma4';;
|
||||||
|
AVXFMA)
|
||||||
|
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3])
|
||||||
|
SIMD_FLAGS='-mavx -mfma';;
|
||||||
|
AVX2)
|
||||||
|
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
|
||||||
|
SIMD_FLAGS='-mavx2 -mfma';;
|
||||||
|
AVX512)
|
||||||
|
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
||||||
|
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
|
||||||
|
KNC)
|
||||||
|
AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner])
|
||||||
|
SIMD_FLAGS='';;
|
||||||
|
KNL)
|
||||||
|
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
||||||
|
SIMD_FLAGS='-march=knl';;
|
||||||
|
GEN)
|
||||||
|
AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
|
||||||
|
SIMD_FLAGS='';;
|
||||||
|
QPX|BGQ)
|
||||||
|
AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q])
|
||||||
|
SIMD_FLAGS='';;
|
||||||
|
*)
|
||||||
|
AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);;
|
||||||
|
esac;;
|
||||||
|
intel)
|
||||||
|
case ${ac_SIMD} in
|
||||||
|
SSE4)
|
||||||
|
AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
|
||||||
|
SIMD_FLAGS='-msse4.2 -xsse4.2';;
|
||||||
|
AVX)
|
||||||
|
AC_DEFINE([AVX1],[1],[AVX intrinsics])
|
||||||
|
SIMD_FLAGS='-mavx -xavx';;
|
||||||
|
AVXFMA)
|
||||||
|
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA4])
|
||||||
|
SIMD_FLAGS='-mavx -mfma';;
|
||||||
|
AVX2)
|
||||||
|
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
|
||||||
|
SIMD_FLAGS='-march=core-avx2 -xcore-avx2';;
|
||||||
|
AVX512)
|
||||||
|
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
||||||
|
SIMD_FLAGS='-xcore-avx512';;
|
||||||
|
KNC)
|
||||||
|
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner])
|
||||||
|
SIMD_FLAGS='';;
|
||||||
|
KNL)
|
||||||
|
AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing])
|
||||||
|
SIMD_FLAGS='-xmic-avx512';;
|
||||||
|
GEN)
|
||||||
|
AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
|
||||||
|
SIMD_FLAGS='';;
|
||||||
|
*)
|
||||||
|
AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the Intel compiler"]);;
|
||||||
|
esac;;
|
||||||
|
*)
|
||||||
|
AC_MSG_WARN([Compiler unknown, using generic vector code])
|
||||||
|
AC_DEFINE([GENERIC_VEC],[1],[generic vector code]);;
|
||||||
|
esac
|
||||||
|
AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS"
|
||||||
|
AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS"
|
||||||
|
|
||||||
case ${ac_SIMD} in
|
case ${ac_SIMD} in
|
||||||
SSE4)
|
AVX512|KNL)
|
||||||
echo Configuring for SSE4
|
AC_DEFINE([TEST_ZMM],[1],[compile ZMM test]);;
|
||||||
AC_DEFINE([SSE4],[1],[SSE4 Intrinsics] )
|
*)
|
||||||
if test x"$ax_cv_support_ssse3_ext" = x"yes"; then dnl minimal support for SSE4
|
;;
|
||||||
supported=yes
|
|
||||||
else
|
|
||||||
AC_MSG_WARN([Your processor does not support SSE4 instructions])
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
AVX)
|
|
||||||
echo Configuring for AVX
|
|
||||||
AC_DEFINE([AVX1],[1],[AVX Intrinsics] )
|
|
||||||
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
|
|
||||||
supported=yes
|
|
||||||
else
|
|
||||||
AC_MSG_WARN([Your processor does not support AVX instructions])
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
AVXFMA4)
|
|
||||||
echo Configuring for AVX
|
|
||||||
AC_DEFINE([AVXFMA4],[1],[AVX Intrinsics with FMA4] )
|
|
||||||
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
|
|
||||||
supported=yes
|
|
||||||
else
|
|
||||||
AC_MSG_WARN([Your processor does not support AVX instructions])
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
AVX2)
|
|
||||||
echo Configuring for AVX2
|
|
||||||
AC_DEFINE([AVX2],[1],[AVX2 Intrinsics] )
|
|
||||||
if test x"$ax_cv_support_avx2_ext" = x"yes"; then dnl minimal support for AVX2
|
|
||||||
supported=yes
|
|
||||||
else
|
|
||||||
AC_MSG_WARN([Your processor does not support AVX2 instructions])
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
AVX512)
|
|
||||||
echo Configuring for AVX512
|
|
||||||
AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Landing] )
|
|
||||||
supported="cross compilation"
|
|
||||||
;;
|
|
||||||
IMCI)
|
|
||||||
echo Configuring for IMCI
|
|
||||||
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
|
|
||||||
supported="cross compilation"
|
|
||||||
;;
|
|
||||||
NEONv8)
|
|
||||||
echo Configuring for experimental ARMv8a support
|
|
||||||
AC_DEFINE([NEONv8],[1],[NEON ARMv8 Experimental support ] )
|
|
||||||
supported="cross compilation"
|
|
||||||
;;
|
|
||||||
DEBUG)
|
|
||||||
echo Configuring without SIMD support - only for compiler DEBUGGING!
|
|
||||||
AC_DEFINE([EMPTY_SIMD],[1],[EMPTY_SIMD only for DEBUGGING] )
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
AC_MSG_ERROR([${ac_SIMD} flag unsupported as --enable-simd option\nRun ./configure --help for the list of options]);
|
|
||||||
;;
|
|
||||||
esac
|
esac
|
||||||
|
|
||||||
AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
|
############### Precision selection
|
||||||
|
AC_ARG_ENABLE([precision],
|
||||||
|
[AC_HELP_STRING([--enable-precision=single|double],
|
||||||
|
[Select default word size of Real])],
|
||||||
|
[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
|
||||||
|
|
||||||
case ${ac_PRECISION} in
|
case ${ac_PRECISION} in
|
||||||
single)
|
single)
|
||||||
echo default precision is single
|
|
||||||
AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
|
AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
|
||||||
;;
|
;;
|
||||||
double)
|
double)
|
||||||
echo default precision is double
|
|
||||||
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
|
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
|
############### communication type selection
|
||||||
|
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem],
|
||||||
|
[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
|
||||||
|
|
||||||
case ${ac_COMMS} in
|
case ${ac_COMMS} in
|
||||||
none)
|
none)
|
||||||
echo Configuring for NO communications
|
AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
|
||||||
AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
|
comms_type='none'
|
||||||
;;
|
;;
|
||||||
mpi)
|
mpi3l*)
|
||||||
echo Configuring for MPI communications
|
AC_DEFINE([GRID_COMMS_MPI3L],[1],[GRID_COMMS_MPI3L] )
|
||||||
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
|
comms_type='mpi3l'
|
||||||
|
;;
|
||||||
|
mpi3*)
|
||||||
|
AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] )
|
||||||
|
comms_type='mpi3'
|
||||||
|
;;
|
||||||
|
mpi*)
|
||||||
|
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
|
||||||
|
comms_type='mpi'
|
||||||
|
;;
|
||||||
|
shmem)
|
||||||
|
AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
|
||||||
|
comms_type='shmem'
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
|
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
case ${ac_COMMS} in
|
||||||
|
*-auto)
|
||||||
|
LX_FIND_MPI
|
||||||
|
if test "x$have_CXX_mpi" = 'xno'; then AC_MSG_ERROR(["MPI not found"]); fi
|
||||||
|
AM_CXXFLAGS="$MPI_CXXFLAGS $AM_CXXFLAGS"
|
||||||
|
AM_CFLAGS="$MPI_CFLAGS $AM_CFLAGS"
|
||||||
|
AM_LDFLAGS="`echo $MPI_CXXLDFLAGS | sed -E 's/-l@<:@^ @:>@+//g'` $AM_LDFLAGS"
|
||||||
|
LIBS="`echo $MPI_CXXLDFLAGS | sed -E 's/-L@<:@^ @:>@+//g'` $LIBS";;
|
||||||
|
*)
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ])
|
||||||
|
AM_CONDITIONAL(BUILD_COMMS_MPI, [ test "${comms_type}X" == "mpiX" ])
|
||||||
|
AM_CONDITIONAL(BUILD_COMMS_MPI3, [ test "${comms_type}X" == "mpi3X" ] )
|
||||||
|
AM_CONDITIONAL(BUILD_COMMS_MPI3L, [ test "${comms_type}X" == "mpi3lX" ] )
|
||||||
|
AM_CONDITIONAL(BUILD_COMMS_NONE, [ test "${comms_type}X" == "noneX" ])
|
||||||
|
|
||||||
|
############### RNG selection
|
||||||
|
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\
|
||||||
|
[Select Random Number Generator to be used])],\
|
||||||
|
[ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
|
||||||
|
|
||||||
|
case ${ac_RNG} in
|
||||||
|
ranlux48)
|
||||||
|
AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] )
|
||||||
|
;;
|
||||||
|
mt19937)
|
||||||
|
AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] )
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" ])
|
############### Timer option
|
||||||
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
|
AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers],\
|
||||||
|
[Enable system dependent high res timers])],\
|
||||||
|
[ac_TIMERS=${enable_timers}],[ac_TIMERS=yes])
|
||||||
|
|
||||||
AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no)
|
case ${ac_TIMERS} in
|
||||||
|
|
||||||
case ${ac_CHROMA} in
|
|
||||||
yes)
|
yes)
|
||||||
echo Enabling tests regressing to Chroma
|
AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] )
|
||||||
;;
|
;;
|
||||||
no)
|
no)
|
||||||
echo Disabling tests regressing to Chroma
|
AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
|
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
############### Chroma regression test
|
||||||
|
AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],
|
||||||
|
[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no)
|
||||||
|
|
||||||
|
case ${ac_CHROMA} in
|
||||||
|
yes|no)
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ])
|
AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ])
|
||||||
|
|
||||||
###################################################################
|
############### Doxygen
|
||||||
# Checks for doxygen support
|
AC_PROG_DOXYGEN
|
||||||
# if present enables the "make doxyfile" command
|
|
||||||
#echo
|
|
||||||
#echo Checking doxygen support
|
|
||||||
#echo :::::::::::::::::::::::::::::::::::::::::::
|
|
||||||
#AC_PROG_DOXYGEN
|
|
||||||
|
|
||||||
#if test -n "$DOXYGEN"
|
if test -n "$DOXYGEN"
|
||||||
#then
|
then
|
||||||
#AC_CONFIG_FILES([docs/doxy.cfg])
|
AC_CONFIG_FILES([docs/doxy.cfg])
|
||||||
#fi
|
fi
|
||||||
|
|
||||||
echo
|
############### Ouput
|
||||||
echo Creating configuration files
|
cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd}
|
||||||
echo :::::::::::::::::::::::::::::::::::::::::::
|
AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS"
|
||||||
|
AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS"
|
||||||
|
AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS"
|
||||||
|
AC_SUBST([AM_CFLAGS])
|
||||||
|
AC_SUBST([AM_CXXFLAGS])
|
||||||
|
AC_SUBST([AM_LDFLAGS])
|
||||||
AC_CONFIG_FILES(Makefile)
|
AC_CONFIG_FILES(Makefile)
|
||||||
AC_CONFIG_FILES(lib/Makefile)
|
AC_CONFIG_FILES(lib/Makefile)
|
||||||
AC_CONFIG_FILES(tests/Makefile)
|
AC_CONFIG_FILES(tests/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/IO/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/core/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/debug/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/forces/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/hmc/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/solver/Makefile)
|
||||||
AC_CONFIG_FILES(tests/qdpxx/Makefile)
|
AC_CONFIG_FILES(tests/qdpxx/Makefile)
|
||||||
AC_CONFIG_FILES(benchmarks/Makefile)
|
AC_CONFIG_FILES(benchmarks/Makefile)
|
||||||
AC_OUTPUT
|
AC_OUTPUT
|
||||||
|
|
||||||
|
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
echo "
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
Summary of configuration for $PACKAGE v$VERSION
|
Summary of configuration for $PACKAGE v$VERSION
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
The following features are enabled:
|
----- PLATFORM ----------------------------------------
|
||||||
|
architecture (build) : $build_cpu
|
||||||
- architecture (build) : $build_cpu
|
os (build) : $build_os
|
||||||
- os (build) : $build_os
|
architecture (target) : $target_cpu
|
||||||
- architecture (target) : $target_cpu
|
os (target) : $target_os
|
||||||
- os (target) : $target_os
|
compiler vendor : ${ax_cv_cxx_compiler_vendor}
|
||||||
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
|
compiler version : ${ax_cv_gxx_version}
|
||||||
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
|
----- BUILD OPTIONS -----------------------------------
|
||||||
- Supported SIMD flags : $SIMD_FLAGS
|
SIMD : ${ac_SIMD}
|
||||||
----------------------------------------------------------
|
Threading : ${ac_openmp}
|
||||||
- enabled simd support : ${ac_SIMD} (supported: $supported )
|
Communications type : ${comms_type}
|
||||||
- communications type : ${ac_COMMS}
|
Default precision : ${ac_PRECISION}
|
||||||
|
RNG choice : ${ac_RNG}
|
||||||
|
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
|
||||||
"
|
LAPACK : ${ac_LAPACK}
|
||||||
|
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
|
||||||
|
build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
|
||||||
|
graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
|
||||||
|
----- BUILD FLAGS -------------------------------------
|
||||||
|
CXXFLAGS:
|
||||||
|
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||||
|
LDFLAGS:
|
||||||
|
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||||
|
LIBS:
|
||||||
|
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||||
|
-------------------------------------------------------" > config.summary
|
||||||
|
echo ""
|
||||||
|
cat config.summary
|
||||||
|
echo ""
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./gcc-bug-report/broken.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <complex>
|
#include <complex>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
1
include/Grid
Symbolic link
1
include/Grid
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../lib
|
@ -1,27 +1,56 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Algorithms.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_ALGORITHMS_H
|
#ifndef GRID_ALGORITHMS_H
|
||||||
#define GRID_ALGORITHMS_H
|
#define GRID_ALGORITHMS_H
|
||||||
|
|
||||||
#include <algorithms/SparseMatrix.h>
|
#include <Grid/algorithms/SparseMatrix.h>
|
||||||
#include <algorithms/LinearOperator.h>
|
#include <Grid/algorithms/LinearOperator.h>
|
||||||
#include <algorithms/Preconditioner.h>
|
#include <Grid/algorithms/Preconditioner.h>
|
||||||
|
|
||||||
#include <algorithms/approx/Zolotarev.h>
|
#include <Grid/algorithms/approx/Zolotarev.h>
|
||||||
#include <algorithms/approx/Chebyshev.h>
|
#include <Grid/algorithms/approx/Chebyshev.h>
|
||||||
#include <algorithms/approx/Remez.h>
|
#include <Grid/algorithms/approx/Remez.h>
|
||||||
#include <algorithms/approx/MultiShiftFunction.h>
|
#include <Grid/algorithms/approx/MultiShiftFunction.h>
|
||||||
|
|
||||||
#include <algorithms/iterative/ConjugateGradient.h>
|
#include <Grid/algorithms/iterative/ConjugateGradient.h>
|
||||||
#include <algorithms/iterative/ConjugateResidual.h>
|
#include <Grid/algorithms/iterative/ConjugateResidual.h>
|
||||||
#include <algorithms/iterative/NormalEquations.h>
|
#include <Grid/algorithms/iterative/NormalEquations.h>
|
||||||
#include <algorithms/iterative/SchurRedBlack.h>
|
#include <Grid/algorithms/iterative/SchurRedBlack.h>
|
||||||
|
|
||||||
#include <algorithms/iterative/ConjugateGradientMultiShift.h>
|
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
|
||||||
|
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
|
||||||
|
|
||||||
// Lanczos support
|
// Lanczos support
|
||||||
#include <algorithms/iterative/MatrixUtils.h>
|
#include <Grid/algorithms/iterative/MatrixUtils.h>
|
||||||
#include <algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||||
|
|
||||||
#include <algorithms/CoarsenedMatrix.h>
|
#include <Grid/algorithms/CoarsenedMatrix.h>
|
||||||
|
|
||||||
// Eigen/lanczos
|
// Eigen/lanczos
|
||||||
// EigCg
|
// EigCg
|
||||||
|
@ -1,3 +1,31 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/AlignedAllocator.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_ALIGNED_ALLOCATOR_H
|
#ifndef GRID_ALIGNED_ALLOCATOR_H
|
||||||
#define GRID_ALIGNED_ALLOCATOR_H
|
#define GRID_ALIGNED_ALLOCATOR_H
|
||||||
|
|
||||||
@ -8,7 +36,6 @@
|
|||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <immintrin.h>
|
|
||||||
#ifdef HAVE_MM_MALLOC_H
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
#include <mm_malloc.h>
|
#include <mm_malloc.h>
|
||||||
#endif
|
#endif
|
||||||
@ -30,27 +57,28 @@ public:
|
|||||||
typedef _Tp value_type;
|
typedef _Tp value_type;
|
||||||
|
|
||||||
template<typename _Tp1> struct rebind { typedef alignedAllocator<_Tp1> other; };
|
template<typename _Tp1> struct rebind { typedef alignedAllocator<_Tp1> other; };
|
||||||
|
|
||||||
alignedAllocator() throw() { }
|
alignedAllocator() throw() { }
|
||||||
|
|
||||||
alignedAllocator(const alignedAllocator&) throw() { }
|
alignedAllocator(const alignedAllocator&) throw() { }
|
||||||
|
|
||||||
template<typename _Tp1> alignedAllocator(const alignedAllocator<_Tp1>&) throw() { }
|
template<typename _Tp1> alignedAllocator(const alignedAllocator<_Tp1>&) throw() { }
|
||||||
|
|
||||||
~alignedAllocator() throw() { }
|
~alignedAllocator() throw() { }
|
||||||
|
|
||||||
pointer address(reference __x) const { return &__x; }
|
pointer address(reference __x) const { return &__x; }
|
||||||
// const_pointer address(const_reference __x) const { return &__x; }
|
|
||||||
|
|
||||||
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||||
|
|
||||||
pointer allocate(size_type __n, const void* = 0)
|
pointer allocate(size_type __n, const void* _p= 0)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_MM_MALLOC_H
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
|
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
|
||||||
#else
|
#else
|
||||||
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
|
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
_Tp tmp;
|
||||||
|
#ifdef GRID_NUMA
|
||||||
|
#pragma omp parallel for schedule(static)
|
||||||
|
for(int i=0;i<__n;i++){
|
||||||
|
ptr[i]=tmp;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -63,15 +91,101 @@ public:
|
|||||||
}
|
}
|
||||||
void construct(pointer __p, const _Tp& __val) { };
|
void construct(pointer __p, const _Tp& __val) { };
|
||||||
void construct(pointer __p) { };
|
void construct(pointer __p) { };
|
||||||
|
|
||||||
void destroy(pointer __p) { };
|
void destroy(pointer __p) { };
|
||||||
};
|
};
|
||||||
|
template<typename _Tp> inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
||||||
|
template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
||||||
|
|
||||||
template<typename _Tp> inline bool
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
// MPI3 : comms must use shm region
|
||||||
|
// SHMEM: comms must use symmetric heap
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
#ifdef GRID_COMMS_SHMEM
|
||||||
|
extern "C" {
|
||||||
|
#include <mpp/shmem.h>
|
||||||
|
extern void * shmem_align(size_t, size_t);
|
||||||
|
extern void shmem_free(void *);
|
||||||
|
}
|
||||||
|
#define PARANOID_SYMMETRIC_HEAP
|
||||||
|
#endif
|
||||||
|
|
||||||
template<typename _Tp> inline bool
|
template<typename _Tp>
|
||||||
operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
class commAllocator {
|
||||||
|
public:
|
||||||
|
typedef std::size_t size_type;
|
||||||
|
typedef std::ptrdiff_t difference_type;
|
||||||
|
typedef _Tp* pointer;
|
||||||
|
typedef const _Tp* const_pointer;
|
||||||
|
typedef _Tp& reference;
|
||||||
|
typedef const _Tp& const_reference;
|
||||||
|
typedef _Tp value_type;
|
||||||
|
|
||||||
|
template<typename _Tp1> struct rebind { typedef commAllocator<_Tp1> other; };
|
||||||
|
commAllocator() throw() { }
|
||||||
|
commAllocator(const commAllocator&) throw() { }
|
||||||
|
template<typename _Tp1> commAllocator(const commAllocator<_Tp1>&) throw() { }
|
||||||
|
~commAllocator() throw() { }
|
||||||
|
pointer address(reference __x) const { return &__x; }
|
||||||
|
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||||
|
|
||||||
|
#ifdef GRID_COMMS_SHMEM
|
||||||
|
pointer allocate(size_type __n, const void* _p= 0)
|
||||||
|
{
|
||||||
|
#ifdef CRAY
|
||||||
|
_Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64);
|
||||||
|
#else
|
||||||
|
_Tp *ptr = (_Tp *) shmem_align(64,__n*sizeof(_Tp));
|
||||||
|
#endif
|
||||||
|
#ifdef PARANOID_SYMMETRIC_HEAP
|
||||||
|
static void * bcast;
|
||||||
|
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
||||||
|
|
||||||
|
bcast = (void *) ptr;
|
||||||
|
shmem_broadcast32((void *)&bcast,(void *)&bcast,sizeof(void *)/4,0,0,0,shmem_n_pes(),psync);
|
||||||
|
|
||||||
|
if ( bcast != ptr ) {
|
||||||
|
std::printf("inconsistent alloc pe %d %lx %lx \n",shmem_my_pe(),bcast,ptr);std::fflush(stdout);
|
||||||
|
// BACKTRACEFILE();
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
assert( bcast == (void *) ptr);
|
||||||
|
#endif
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
void deallocate(pointer __p, size_type) {
|
||||||
|
shmem_free((void *)__p);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
pointer allocate(size_type __n, const void* _p= 0)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
|
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
|
||||||
|
#else
|
||||||
|
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
|
||||||
|
#endif
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
void deallocate(pointer __p, size_type) {
|
||||||
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
|
_mm_free((void *)__p);
|
||||||
|
#else
|
||||||
|
free((void *)__p);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
void construct(pointer __p, const _Tp& __val) { };
|
||||||
|
void construct(pointer __p) { };
|
||||||
|
void destroy(pointer __p) { };
|
||||||
|
};
|
||||||
|
template<typename _Tp> inline bool operator==(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return true; }
|
||||||
|
template<typename _Tp> inline bool operator!=(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return false; }
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Template typedefs
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
template<class T> using Vector = std::vector<T,alignedAllocator<T> >;
|
||||||
|
template<class T> using commVector = std::vector<T,commAllocator<T> >;
|
||||||
|
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >;
|
||||||
|
|
||||||
}; // namespace Grid
|
}; // namespace Grid
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,8 +1,35 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Cartesian.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_CARTESIAN_H
|
#ifndef GRID_CARTESIAN_H
|
||||||
#define GRID_CARTESIAN_H
|
#define GRID_CARTESIAN_H
|
||||||
|
|
||||||
#include <cartesian/Cartesian_base.h>
|
#include <Grid/cartesian/Cartesian_base.h>
|
||||||
#include <cartesian/Cartesian_full.h>
|
#include <Grid/cartesian/Cartesian_full.h>
|
||||||
#include <cartesian/Cartesian_red_black.h>
|
#include <Grid/cartesian/Cartesian_red_black.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,6 +1,33 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Communicator.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_COMMUNICATOR_H
|
#ifndef GRID_COMMUNICATOR_H
|
||||||
#define GRID_COMMUNICATOR_H
|
#define GRID_COMMUNICATOR_H
|
||||||
|
|
||||||
#include <communicator/Communicator_base.h>
|
#include <Grid/communicator/Communicator_base.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
174
lib/Config.h.in
174
lib/Config.h.in
@ -1,174 +0,0 @@
|
|||||||
/* lib/Config.h.in. Generated from configure.ac by autoheader. */
|
|
||||||
|
|
||||||
/* AVX Intrinsics */
|
|
||||||
#undef AVX1
|
|
||||||
|
|
||||||
/* AVX2 Intrinsics */
|
|
||||||
#undef AVX2
|
|
||||||
|
|
||||||
/* AVX512 Intrinsics for Knights Landing */
|
|
||||||
#undef AVX512
|
|
||||||
|
|
||||||
/* AVX Intrinsics with FMA4 */
|
|
||||||
#undef AVXFMA4
|
|
||||||
|
|
||||||
/* EMPTY_SIMD only for DEBUGGING */
|
|
||||||
#undef EMPTY_SIMD
|
|
||||||
|
|
||||||
/* GRID_COMMS_MPI */
|
|
||||||
#undef GRID_COMMS_MPI
|
|
||||||
|
|
||||||
/* GRID_COMMS_NONE */
|
|
||||||
#undef GRID_COMMS_NONE
|
|
||||||
|
|
||||||
/* GRID_DEFAULT_PRECISION is DOUBLE */
|
|
||||||
#undef GRID_DEFAULT_PRECISION_DOUBLE
|
|
||||||
|
|
||||||
/* GRID_DEFAULT_PRECISION is SINGLE */
|
|
||||||
#undef GRID_DEFAULT_PRECISION_SINGLE
|
|
||||||
|
|
||||||
/* Support Altivec instructions */
|
|
||||||
#undef HAVE_ALTIVEC
|
|
||||||
|
|
||||||
/* Support AVX (Advanced Vector Extensions) instructions */
|
|
||||||
#undef HAVE_AVX
|
|
||||||
|
|
||||||
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
|
|
||||||
#undef HAVE_AVX2
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
|
|
||||||
don't. */
|
|
||||||
#undef HAVE_DECL_BE64TOH
|
|
||||||
|
|
||||||
/* Define to 1 if you have the declaration of `ntohll', and to 0 if you don't.
|
|
||||||
*/
|
|
||||||
#undef HAVE_DECL_NTOHLL
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <endian.h> header file. */
|
|
||||||
#undef HAVE_ENDIAN_H
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <execinfo.h> header file. */
|
|
||||||
#undef HAVE_EXECINFO_H
|
|
||||||
|
|
||||||
/* Support FMA3 (Fused Multiply-Add) instructions */
|
|
||||||
#undef HAVE_FMA
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `gettimeofday' function. */
|
|
||||||
#undef HAVE_GETTIMEOFDAY
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <gmp.h> header file. */
|
|
||||||
#undef HAVE_GMP_H
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
|
||||||
#undef HAVE_INTTYPES_H
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <malloc.h> header file. */
|
|
||||||
#undef HAVE_MALLOC_H
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <malloc/malloc.h> header file. */
|
|
||||||
#undef HAVE_MALLOC_MALLOC_H
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <memory.h> header file. */
|
|
||||||
#undef HAVE_MEMORY_H
|
|
||||||
|
|
||||||
/* Support mmx instructions */
|
|
||||||
#undef HAVE_MMX
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <mm_malloc.h> header file. */
|
|
||||||
#undef HAVE_MM_MALLOC_H
|
|
||||||
|
|
||||||
/* Support SSE (Streaming SIMD Extensions) instructions */
|
|
||||||
#undef HAVE_SSE
|
|
||||||
|
|
||||||
/* Support SSE2 (Streaming SIMD Extensions 2) instructions */
|
|
||||||
#undef HAVE_SSE2
|
|
||||||
|
|
||||||
/* Support SSE3 (Streaming SIMD Extensions 3) instructions */
|
|
||||||
#undef HAVE_SSE3
|
|
||||||
|
|
||||||
/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */
|
|
||||||
#undef HAVE_SSE4_1
|
|
||||||
|
|
||||||
/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */
|
|
||||||
#undef HAVE_SSE4_2
|
|
||||||
|
|
||||||
/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */
|
|
||||||
#undef HAVE_SSSE3
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <stdint.h> header file. */
|
|
||||||
#undef HAVE_STDINT_H
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
|
||||||
#undef HAVE_STDLIB_H
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <strings.h> header file. */
|
|
||||||
#undef HAVE_STRINGS_H
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <string.h> header file. */
|
|
||||||
#undef HAVE_STRING_H
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
|
||||||
#undef HAVE_SYS_STAT_H
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
|
||||||
#undef HAVE_SYS_TYPES_H
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <unistd.h> header file. */
|
|
||||||
#undef HAVE_UNISTD_H
|
|
||||||
|
|
||||||
/* IMCI Intrinsics for Knights Corner */
|
|
||||||
#undef IMCI
|
|
||||||
|
|
||||||
/* NEON ARMv8 Experimental support */
|
|
||||||
#undef NEONv8
|
|
||||||
|
|
||||||
/* Name of package */
|
|
||||||
#undef PACKAGE
|
|
||||||
|
|
||||||
/* Define to the address where bug reports for this package should be sent. */
|
|
||||||
#undef PACKAGE_BUGREPORT
|
|
||||||
|
|
||||||
/* Define to the full name of this package. */
|
|
||||||
#undef PACKAGE_NAME
|
|
||||||
|
|
||||||
/* Define to the full name and version of this package. */
|
|
||||||
#undef PACKAGE_STRING
|
|
||||||
|
|
||||||
/* Define to the one symbol short name of this package. */
|
|
||||||
#undef PACKAGE_TARNAME
|
|
||||||
|
|
||||||
/* Define to the home page for this package. */
|
|
||||||
#undef PACKAGE_URL
|
|
||||||
|
|
||||||
/* Define to the version of this package. */
|
|
||||||
#undef PACKAGE_VERSION
|
|
||||||
|
|
||||||
/* SSE4 Intrinsics */
|
|
||||||
#undef SSE4
|
|
||||||
|
|
||||||
/* Define to 1 if you have the ANSI C header files. */
|
|
||||||
#undef STDC_HEADERS
|
|
||||||
|
|
||||||
/* Version number of package */
|
|
||||||
#undef VERSION
|
|
||||||
|
|
||||||
/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
|
|
||||||
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
|
|
||||||
#define below would cause a syntax error. */
|
|
||||||
#undef _UINT32_T
|
|
||||||
|
|
||||||
/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
|
|
||||||
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
|
|
||||||
#define below would cause a syntax error. */
|
|
||||||
#undef _UINT64_T
|
|
||||||
|
|
||||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
|
||||||
#undef size_t
|
|
||||||
|
|
||||||
/* Define to the type of an unsigned integer type of width exactly 32 bits if
|
|
||||||
such a type exists and the standard includes do not define it. */
|
|
||||||
#undef uint32_t
|
|
||||||
|
|
||||||
/* Define to the type of an unsigned integer type of width exactly 64 bits if
|
|
||||||
such a type exists and the standard includes do not define it. */
|
|
||||||
#undef uint64_t
|
|
45
lib/Cshift.h
45
lib/Cshift.h
@ -1,13 +1,52 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Cshift.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef _GRID_CSHIFT_H_
|
#ifndef _GRID_CSHIFT_H_
|
||||||
#define _GRID_CSHIFT_H_
|
#define _GRID_CSHIFT_H_
|
||||||
|
|
||||||
#include <cshift/Cshift_common.h>
|
#include <Grid/cshift/Cshift_common.h>
|
||||||
|
|
||||||
#ifdef GRID_COMMS_NONE
|
#ifdef GRID_COMMS_NONE
|
||||||
#include <cshift/Cshift_none.h>
|
#include <Grid/cshift/Cshift_none.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef GRID_COMMS_MPI
|
#ifdef GRID_COMMS_MPI
|
||||||
#include <cshift/Cshift_mpi.h>
|
#include <Grid/cshift/Cshift_mpi.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef GRID_COMMS_MPI3
|
||||||
|
#include <Grid/cshift/Cshift_mpi.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef GRID_COMMS_MPI3L
|
||||||
|
#include <Grid/cshift/Cshift_mpi.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef GRID_COMMS_SHMEM
|
||||||
|
#include <Grid/cshift/Cshift_mpi.h> // uses same implementation of communicator
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
302
lib/FFT.h
Normal file
302
lib/FFT.h
Normal file
@ -0,0 +1,302 @@
|
|||||||
|
|
||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Cshift.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#ifndef _GRID_FFT_H_
|
||||||
|
#define _GRID_FFT_H_
|
||||||
|
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
|
#ifdef USE_MKL
|
||||||
|
#include <fftw/fftw3.h>
|
||||||
|
#else
|
||||||
|
#include <fftw3.h>
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
template<class scalar> struct FFTW { };
|
||||||
|
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
|
template<> struct FFTW<ComplexD> {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef fftw_complex FFTW_scalar;
|
||||||
|
typedef fftw_plan FFTW_plan;
|
||||||
|
|
||||||
|
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
|
||||||
|
FFTW_scalar *in, const int *inembed,
|
||||||
|
int istride, int idist,
|
||||||
|
FFTW_scalar *out, const int *onembed,
|
||||||
|
int ostride, int odist,
|
||||||
|
int sign, unsigned flags) {
|
||||||
|
return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
|
||||||
|
::fftw_flops(p,add,mul,fmas);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
|
||||||
|
::fftw_execute_dft(p,in,out);
|
||||||
|
}
|
||||||
|
inline static void fftw_destroy_plan(const FFTW_plan p) {
|
||||||
|
::fftw_destroy_plan(p);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct FFTW<ComplexF> {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef fftwf_complex FFTW_scalar;
|
||||||
|
typedef fftwf_plan FFTW_plan;
|
||||||
|
|
||||||
|
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
|
||||||
|
FFTW_scalar *in, const int *inembed,
|
||||||
|
int istride, int idist,
|
||||||
|
FFTW_scalar *out, const int *onembed,
|
||||||
|
int ostride, int odist,
|
||||||
|
int sign, unsigned flags) {
|
||||||
|
return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
|
||||||
|
::fftwf_flops(p,add,mul,fmas);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
|
||||||
|
::fftwf_execute_dft(p,in,out);
|
||||||
|
}
|
||||||
|
inline static void fftw_destroy_plan(const FFTW_plan p) {
|
||||||
|
::fftwf_destroy_plan(p);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef FFTW_FORWARD
|
||||||
|
#define FFTW_FORWARD (-1)
|
||||||
|
#define FFTW_BACKWARD (+1)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
class FFT {
|
||||||
|
private:
|
||||||
|
|
||||||
|
GridCartesian *vgrid;
|
||||||
|
GridCartesian *sgrid;
|
||||||
|
|
||||||
|
int Nd;
|
||||||
|
double flops;
|
||||||
|
double flops_call;
|
||||||
|
uint64_t usec;
|
||||||
|
|
||||||
|
std::vector<int> dimensions;
|
||||||
|
std::vector<int> processors;
|
||||||
|
std::vector<int> processor_coor;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
static const int forward=FFTW_FORWARD;
|
||||||
|
static const int backward=FFTW_BACKWARD;
|
||||||
|
|
||||||
|
double Flops(void) {return flops;}
|
||||||
|
double MFlops(void) {return flops/usec;}
|
||||||
|
double USec(void) {return (double)usec;}
|
||||||
|
|
||||||
|
FFT ( GridCartesian * grid ) :
|
||||||
|
vgrid(grid),
|
||||||
|
Nd(grid->_ndimension),
|
||||||
|
dimensions(grid->_fdimensions),
|
||||||
|
processors(grid->_processors),
|
||||||
|
processor_coor(grid->_processor_coor)
|
||||||
|
{
|
||||||
|
flops=0;
|
||||||
|
usec =0;
|
||||||
|
std::vector<int> layout(Nd,1);
|
||||||
|
sgrid = new GridCartesian(dimensions,layout,processors);
|
||||||
|
};
|
||||||
|
|
||||||
|
~FFT ( void) {
|
||||||
|
delete sgrid;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
void FFT_dim_mask(Lattice<vobj> &result,const Lattice<vobj> &source,std::vector<int> mask,int sign){
|
||||||
|
|
||||||
|
conformable(result._grid,vgrid);
|
||||||
|
conformable(source._grid,vgrid);
|
||||||
|
Lattice<vobj> tmp(vgrid);
|
||||||
|
tmp = source;
|
||||||
|
for(int d=0;d<Nd;d++){
|
||||||
|
if( mask[d] ) {
|
||||||
|
FFT_dim(result,tmp,d,sign);
|
||||||
|
tmp=result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
void FFT_all_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int sign){
|
||||||
|
std::vector<int> mask(Nd,1);
|
||||||
|
FFT_dim_mask(result,source,mask,sign);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
void FFT_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int dim, int sign){
|
||||||
|
#ifndef HAVE_FFTW
|
||||||
|
assert(0);
|
||||||
|
#else
|
||||||
|
conformable(result._grid,vgrid);
|
||||||
|
conformable(source._grid,vgrid);
|
||||||
|
|
||||||
|
int L = vgrid->_ldimensions[dim];
|
||||||
|
int G = vgrid->_fdimensions[dim];
|
||||||
|
|
||||||
|
std::vector<int> layout(Nd,1);
|
||||||
|
std::vector<int> pencil_gd(vgrid->_fdimensions);
|
||||||
|
|
||||||
|
pencil_gd[dim] = G*processors[dim];
|
||||||
|
|
||||||
|
// Pencil global vol LxLxGxLxL per node
|
||||||
|
GridCartesian pencil_g(pencil_gd,layout,processors);
|
||||||
|
|
||||||
|
// Construct pencils
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
typedef typename sobj::scalar_type scalar;
|
||||||
|
|
||||||
|
Lattice<sobj> pgbuf(&pencil_g);
|
||||||
|
|
||||||
|
|
||||||
|
typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar;
|
||||||
|
typedef typename FFTW<scalar>::FFTW_plan FFTW_plan;
|
||||||
|
|
||||||
|
int Ncomp = sizeof(sobj)/sizeof(scalar);
|
||||||
|
int Nlow = 1;
|
||||||
|
for(int d=0;d<dim;d++){
|
||||||
|
Nlow*=vgrid->_ldimensions[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
int rank = 1; /* 1d transforms */
|
||||||
|
int n[] = {G}; /* 1d transforms of length G */
|
||||||
|
int howmany = Ncomp;
|
||||||
|
int odist,idist,istride,ostride;
|
||||||
|
idist = odist = 1; /* Distance between consecutive FT's */
|
||||||
|
istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */
|
||||||
|
int *inembed = n, *onembed = n;
|
||||||
|
|
||||||
|
scalar div;
|
||||||
|
if ( sign == backward ) div = 1.0/G;
|
||||||
|
else if ( sign == forward ) div = 1.0;
|
||||||
|
else assert(0);
|
||||||
|
|
||||||
|
FFTW_plan p;
|
||||||
|
{
|
||||||
|
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[0];
|
||||||
|
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[0];
|
||||||
|
p = FFTW<scalar>::fftw_plan_many_dft(rank,n,howmany,
|
||||||
|
in,inembed,
|
||||||
|
istride,idist,
|
||||||
|
out,onembed,
|
||||||
|
ostride, odist,
|
||||||
|
sign,FFTW_ESTIMATE);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Barrel shift and collect global pencil
|
||||||
|
std::vector<int> lcoor(Nd), gcoor(Nd);
|
||||||
|
result = source;
|
||||||
|
for(int p=0;p<processors[dim];p++) {
|
||||||
|
PARALLEL_REGION
|
||||||
|
{
|
||||||
|
std::vector<int> cbuf(Nd);
|
||||||
|
sobj s;
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP_INTERN
|
||||||
|
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||||
|
sgrid->LocalIndexToLocalCoor(idx,cbuf);
|
||||||
|
peekLocalSite(s,result,cbuf);
|
||||||
|
cbuf[dim]+=p*L;
|
||||||
|
pokeLocalSite(s,pgbuf,cbuf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result = Cshift(result,dim,L);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loop over orthog coords
|
||||||
|
int NN=pencil_g.lSites();
|
||||||
|
GridStopWatch timer;
|
||||||
|
timer.Start();
|
||||||
|
PARALLEL_REGION
|
||||||
|
{
|
||||||
|
std::vector<int> cbuf(Nd);
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP_INTERN
|
||||||
|
for(int idx=0;idx<NN;idx++) {
|
||||||
|
pencil_g.LocalIndexToLocalCoor(idx, cbuf);
|
||||||
|
if ( cbuf[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
|
||||||
|
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx];
|
||||||
|
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx];
|
||||||
|
FFTW<scalar>::fftw_execute_dft(p,in,out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
timer.Stop();
|
||||||
|
|
||||||
|
// performance counting
|
||||||
|
double add,mul,fma;
|
||||||
|
FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
|
||||||
|
flops_call = add+mul+2.0*fma;
|
||||||
|
usec += timer.useconds();
|
||||||
|
flops+= flops_call*NN;
|
||||||
|
|
||||||
|
// writing out result
|
||||||
|
int pc = processor_coor[dim];
|
||||||
|
PARALLEL_REGION
|
||||||
|
{
|
||||||
|
std::vector<int> clbuf(Nd), cgbuf(Nd);
|
||||||
|
sobj s;
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP_INTERN
|
||||||
|
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||||
|
sgrid->LocalIndexToLocalCoor(idx,clbuf);
|
||||||
|
cgbuf = clbuf;
|
||||||
|
cgbuf[dim] = clbuf[dim]+L*pc;
|
||||||
|
peekLocalSite(s,pgbuf,cgbuf);
|
||||||
|
s = s * div;
|
||||||
|
pokeLocalSite(s,result,clbuf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// destroying plan
|
||||||
|
FFTW<scalar>::fftw_destroy_plan(p);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
71
lib/Grid.h
71
lib/Grid.h
@ -1,3 +1,32 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Grid.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: azusayamaguchi <ayamaguc@YAMAKAZE.local>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
//
|
//
|
||||||
// Grid.h
|
// Grid.h
|
||||||
// simd
|
// simd
|
||||||
@ -30,25 +59,31 @@
|
|||||||
///////////////////
|
///////////////////
|
||||||
// Grid headers
|
// Grid headers
|
||||||
///////////////////
|
///////////////////
|
||||||
#include <serialisation/Serialisation.h>
|
#include <Grid/serialisation/Serialisation.h>
|
||||||
#include <Config.h>
|
#include "Config.h"
|
||||||
#include <Timer.h>
|
#include <Grid/Timer.h>
|
||||||
#include <Log.h>
|
#include <Grid/PerfCount.h>
|
||||||
#include <AlignedAllocator.h>
|
#include <Grid/Log.h>
|
||||||
#include <Simd.h>
|
#include <Grid/AlignedAllocator.h>
|
||||||
#include <Threads.h>
|
#include <Grid/Simd.h>
|
||||||
#include <Communicator.h>
|
#include <Grid/Threads.h>
|
||||||
#include <Cartesian.h>
|
#include <Grid/Lexicographic.h>
|
||||||
#include <Tensors.h>
|
#include <Grid/Init.h>
|
||||||
#include <Lattice.h>
|
#include <Grid/Communicator.h>
|
||||||
#include <Cshift.h>
|
#include <Grid/Cartesian.h>
|
||||||
#include <Stencil.h>
|
#include <Grid/Tensors.h>
|
||||||
#include <Algorithms.h>
|
#include <Grid/Lattice.h>
|
||||||
#include <qcd/QCD.h>
|
#include <Grid/Cshift.h>
|
||||||
#include <parallelIO/BinaryIO.h>
|
#include <Grid/Stencil.h>
|
||||||
#include <parallelIO/NerscIO.h>
|
#include <Grid/Algorithms.h>
|
||||||
|
#include <Grid/parallelIO/BinaryIO.h>
|
||||||
|
#include <Grid/FFT.h>
|
||||||
|
|
||||||
|
#include <Grid/qcd/QCD.h>
|
||||||
|
#include <Grid/parallelIO/NerscIO.h>
|
||||||
|
#include <Grid/qcd/hmc/NerscCheckpointer.h>
|
||||||
|
#include <Grid/qcd/hmc/HmcRunner.h>
|
||||||
|
|
||||||
#include <Init.h>
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
235
lib/Init.cc
235
lib/Init.cc
@ -1,3 +1,33 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Init.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <peterboyle@MacBook-Pro.local>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
/****************************************************************************/
|
/****************************************************************************/
|
||||||
/* pab: Signal magic. Processor state dump is x86-64 specific */
|
/* pab: Signal magic. Processor state dump is x86-64 specific */
|
||||||
/****************************************************************************/
|
/****************************************************************************/
|
||||||
@ -14,15 +44,33 @@
|
|||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
#define __X86_64
|
|
||||||
|
|
||||||
#ifdef HAVE_EXECINFO_H
|
#include <fenv.h>
|
||||||
#include <execinfo.h>
|
#ifdef __APPLE__
|
||||||
|
static int
|
||||||
|
feenableexcept (unsigned int excepts)
|
||||||
|
{
|
||||||
|
static fenv_t fenv;
|
||||||
|
unsigned int new_excepts = excepts & FE_ALL_EXCEPT,
|
||||||
|
old_excepts; // previous masks
|
||||||
|
|
||||||
|
if ( fegetenv (&fenv) ) return -1;
|
||||||
|
old_excepts = fenv.__control & FE_ALL_EXCEPT;
|
||||||
|
|
||||||
|
// unmask
|
||||||
|
fenv.__control &= ~new_excepts;
|
||||||
|
fenv.__mxcsr &= ~(new_excepts << 7);
|
||||||
|
|
||||||
|
return ( fesetenv (&fenv) ? -1 : old_excepts );
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// Convenience functions to access stadard command line arg
|
// Convenience functions to access stadard command line arg
|
||||||
// driven parallelism controls
|
// driven parallelism controls
|
||||||
@ -99,6 +147,13 @@ void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GridCmdOptionInt(std::string &str,int & val)
|
||||||
|
{
|
||||||
|
std::stringstream ss(str);
|
||||||
|
ss>>val;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void GridParseLayout(char **argv,int argc,
|
void GridParseLayout(char **argv,int argc,
|
||||||
std::vector<int> &latt,
|
std::vector<int> &latt,
|
||||||
@ -120,18 +175,21 @@ void GridParseLayout(char **argv,int argc,
|
|||||||
}
|
}
|
||||||
if( GridCmdOptionExists(argv,argv+argc,"--threads") ){
|
if( GridCmdOptionExists(argv,argv+argc,"--threads") ){
|
||||||
std::vector<int> ompthreads(0);
|
std::vector<int> ompthreads(0);
|
||||||
|
#ifndef GRID_OMP
|
||||||
|
std::cout << GridLogWarning << "'--threads' option used but Grid was"
|
||||||
|
<< " not compiled with thread support" << std::endl;
|
||||||
|
#endif
|
||||||
arg= GridCmdOptionPayload(argv,argv+argc,"--threads");
|
arg= GridCmdOptionPayload(argv,argv+argc,"--threads");
|
||||||
GridCmdOptionIntVector(arg,ompthreads);
|
GridCmdOptionIntVector(arg,ompthreads);
|
||||||
assert(ompthreads.size()==1);
|
assert(ompthreads.size()==1);
|
||||||
GridThread::SetThreads(ompthreads[0]);
|
GridThread::SetThreads(ompthreads[0]);
|
||||||
}
|
}
|
||||||
if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
|
if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
|
||||||
std::vector<int> cores(0);
|
int cores;
|
||||||
arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
|
arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
|
||||||
GridCmdOptionIntVector(arg,cores);
|
GridCmdOptionInt(arg,cores);
|
||||||
GridThread::SetCores(cores[0]);
|
GridThread::SetCores(cores);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GridCmdVectorIntToString(const std::vector<int> & vec){
|
std::string GridCmdVectorIntToString(const std::vector<int> & vec){
|
||||||
@ -140,33 +198,40 @@ std::string GridCmdVectorIntToString(const std::vector<int> & vec){
|
|||||||
return oss.str();
|
return oss.str();
|
||||||
}
|
}
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
//
|
// Reinit guard
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
|
static int Grid_is_initialised = 0;
|
||||||
|
|
||||||
|
|
||||||
void Grid_init(int *argc,char ***argv)
|
void Grid_init(int *argc,char ***argv)
|
||||||
{
|
{
|
||||||
#ifdef GRID_COMMS_MPI
|
|
||||||
MPI_Init(argc,argv);
|
|
||||||
#endif
|
|
||||||
// Parse command line args.
|
|
||||||
|
|
||||||
GridLogger::StopWatch.Start();
|
GridLogger::StopWatch.Start();
|
||||||
|
|
||||||
std::string arg;
|
std::string arg;
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Shared memory block size
|
||||||
|
////////////////////////////////////
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--shm") ){
|
||||||
|
int MB;
|
||||||
|
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm");
|
||||||
|
GridCmdOptionInt(arg,MB);
|
||||||
|
CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024;
|
||||||
|
}
|
||||||
|
|
||||||
|
CartesianCommunicator::Init(argc,argv);
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Logging
|
||||||
|
////////////////////////////////////
|
||||||
|
|
||||||
std::vector<std::string> logstreams;
|
std::vector<std::string> logstreams;
|
||||||
std::string defaultLog("Error,Warning,Message,Performance");
|
std::string defaultLog("Error,Warning,Message,Performance");
|
||||||
|
|
||||||
GridCmdOptionCSL(defaultLog,logstreams);
|
GridCmdOptionCSL(defaultLog,logstreams);
|
||||||
GridLogConfigure(logstreams);
|
GridLogConfigure(logstreams);
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--help") ){
|
if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){
|
||||||
std::cout<<GridLogMessage<<"--help : this message"<<std::endl;
|
Grid_quiesce_nodes();
|
||||||
std::cout<<GridLogMessage<<"--debug-signals : catch sigsegv and print a blame report"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"--debug-stdout : print stdout from EVERY node"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"--decomposition : report on default omp,mpi and simd decomposition"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"--mpi n.n.n.n : default MPI decomposition"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"--omp n : default number of OMP threads"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"--grid n.n.n.n : default Grid size"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"--log list : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Debug"<<std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--log") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--log") ){
|
||||||
@ -175,28 +240,118 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
GridLogConfigure(logstreams);
|
GridLogConfigure(logstreams);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Help message
|
||||||
|
////////////////////////////////////
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--help") ){
|
||||||
|
std::cout<<GridLogMessage<<" --help : this message"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"Geometry:"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --mpi n.n.n.n : default MPI decomposition"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --threads n : default number of OMP threads"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --grid n.n.n.n : default Grid size"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --shm M : allocate M megabytes of shared memory for comms"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"Verbose and debug:"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --log list : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug,Colours"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --decomposition : report on default omp,mpi and simd decomposition"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --debug-signals : catch sigsegv and print a blame report"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --debug-stdout : print stdout from EVERY node"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --notimestamp : suppress millisecond resolution stamps"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"Performance:"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --dslash-generic: Wilson kernel for generic Nc"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --dslash-unroll : Wilson kernel for Nc=3"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --dslash-asm : Wilson kernel for AVX512"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --lebesgue : Cache oblivious Lebesgue curve/Morton order/Z-graph stencil looping"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --cacheblocking n.m.o.p : Hypercuboidal cache blocking"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<std::endl;
|
||||||
|
exit(EXIT_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Banner
|
||||||
|
////////////////////////////////////
|
||||||
|
|
||||||
|
std::string COL_RED = GridLogColours.colour["RED"];
|
||||||
|
std::string COL_PURPLE = GridLogColours.colour["PURPLE"];
|
||||||
|
std::string COL_BLACK = GridLogColours.colour["BLACK"];
|
||||||
|
std::string COL_GREEN = GridLogColours.colour["GREEN"];
|
||||||
|
std::string COL_BLUE = GridLogColours.colour["BLUE"];
|
||||||
|
std::string COL_YELLOW = GridLogColours.colour["YELLOW"];
|
||||||
|
std::string COL_BACKGROUND = GridLogColours.colour["NORMAL"];
|
||||||
|
|
||||||
|
std::cout <<std::endl;
|
||||||
|
std::cout <<COL_RED << "__|__|__|__|__"<< "|__|__|_"<<COL_PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
||||||
|
std::cout <<COL_RED << "__|__|__|__|__"<< "|__|__|_"<<COL_PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
||||||
|
std::cout <<COL_RED << "__|_ | | | "<< "| | | "<<COL_PURPLE<<" | | |"<< " | | | _|__"<<std::endl;
|
||||||
|
std::cout <<COL_RED << "__|_ "<< " "<<COL_PURPLE<<" "<< " _|__"<<std::endl;
|
||||||
|
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<" GGGG "<<COL_RED<<" RRRR "<<COL_BLUE <<" III "<<COL_PURPLE<<"DDDD "<<COL_PURPLE<<" _|__"<<std::endl;
|
||||||
|
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<"G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D "<<COL_PURPLE<<" _|__"<<std::endl;
|
||||||
|
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<"G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D"<<COL_PURPLE<<" _|__"<<std::endl;
|
||||||
|
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<"G GG "<<COL_RED<<" RRRR "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D"<<COL_GREEN <<" _|__"<<std::endl;
|
||||||
|
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<"G G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D "<<COL_GREEN <<" _|__"<<std::endl;
|
||||||
|
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<" GGGG "<<COL_RED<<" R R "<<COL_BLUE <<" III "<<COL_PURPLE<<"DDDD "<<COL_GREEN <<" _|__"<<std::endl;
|
||||||
|
std::cout <<COL_BLUE << "__|_ "<< " "<<COL_GREEN <<" "<< " _|__"<<std::endl;
|
||||||
|
std::cout <<COL_BLUE << "__|__|__|__|__"<< "|__|__|_"<<COL_GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
||||||
|
std::cout <<COL_BLUE << "__|__|__|__|__"<< "|__|__|_"<<COL_GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
||||||
|
std::cout <<COL_BLUE << " | | | | "<< "| | | "<<COL_GREEN <<" | | |"<< " | | | | "<<std::endl;
|
||||||
|
std::cout << std::endl;
|
||||||
|
std::cout << std::endl;
|
||||||
|
std::cout <<COL_YELLOW<< std::endl;
|
||||||
|
std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
|
||||||
|
std::cout << std::endl;
|
||||||
|
std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl;
|
||||||
|
std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl;
|
||||||
|
std::cout << "the Free Software Foundation; either version 2 of the License, or"<<std::endl;
|
||||||
|
std::cout << "(at your option) any later version."<<std::endl;
|
||||||
|
std::cout << std::endl;
|
||||||
|
std::cout << "This program is distributed in the hope that it will be useful,"<<std::endl;
|
||||||
|
std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl;
|
||||||
|
std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the"<<std::endl;
|
||||||
|
std::cout << "GNU General Public License for more details."<<std::endl;
|
||||||
|
std::cout << COL_BACKGROUND <<std::endl;
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Debug and performance options
|
||||||
|
////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
||||||
Grid_debug_handler_init();
|
Grid_debug_handler_init();
|
||||||
}
|
}
|
||||||
if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-unroll") ){
|
||||||
Grid_quiesce_nodes();
|
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptHandUnroll;
|
||||||
}
|
}
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-opt") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-asm") ){
|
||||||
QCD::WilsonFermionStatic::HandOptDslash=1;
|
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptInlineAsm;
|
||||||
QCD::WilsonFermion5DStatic::HandOptDslash=1;
|
}
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-generic") ){
|
||||||
|
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptGeneric;
|
||||||
}
|
}
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
|
||||||
LebesgueOrder::UseLebesgueOrder=1;
|
LebesgueOrder::UseLebesgueOrder=1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
|
||||||
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking");
|
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking");
|
||||||
GridCmdOptionIntVector(arg,LebesgueOrder::Block);
|
GridCmdOptionIntVector(arg,LebesgueOrder::Block);
|
||||||
}
|
}
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--notimestamp") ){
|
||||||
|
GridLogTimestamp(0);
|
||||||
|
} else {
|
||||||
|
GridLogTimestamp(1);
|
||||||
|
}
|
||||||
|
|
||||||
GridParseLayout(*argv,*argc,
|
GridParseLayout(*argv,*argc,
|
||||||
Grid_default_latt,
|
Grid_default_latt,
|
||||||
Grid_default_mpi);
|
Grid_default_mpi);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl;
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
|
||||||
std::cout<<GridLogMessage<<"Grid Decomposition\n";
|
std::cout<<GridLogMessage<<"Grid Decomposition\n";
|
||||||
std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
|
std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
|
||||||
@ -208,23 +363,18 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Grid_is_initialised = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Grid_finalize(void)
|
void Grid_finalize(void)
|
||||||
{
|
{
|
||||||
#ifdef GRID_COMMS_MPI
|
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3)
|
||||||
MPI_Finalize();
|
MPI_Finalize();
|
||||||
Grid_unquiesce_nodes();
|
Grid_unquiesce_nodes();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
double usecond(void) {
|
|
||||||
struct timeval tv;
|
|
||||||
gettimeofday(&tv,NULL);
|
|
||||||
return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define _NBACKTRACE (256)
|
|
||||||
void * Grid_backtrace_buffer[_NBACKTRACE];
|
void * Grid_backtrace_buffer[_NBACKTRACE];
|
||||||
|
|
||||||
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
|
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
|
||||||
@ -236,11 +386,11 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
|
|||||||
// Linux/Posix
|
// Linux/Posix
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
// And x86 64bit
|
// And x86 64bit
|
||||||
ucontext_t * uc= (ucontext_t *)ptr;
|
#ifdef __x86_64__
|
||||||
|
ucontext_t * uc= (ucontext_t *)ptr;
|
||||||
struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
|
struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
|
||||||
printf(" instruction %llx\n",(unsigned long long)sc->rip);
|
printf(" instruction %llx\n",(unsigned long long)sc->rip);
|
||||||
#define REG(A) printf(" %s %lx\n",#A,sc-> A);
|
#define REG(A) printf(" %s %lx\n",#A,sc-> A);
|
||||||
|
|
||||||
REG(rdi);
|
REG(rdi);
|
||||||
REG(rsi);
|
REG(rsi);
|
||||||
REG(rbp);
|
REG(rbp);
|
||||||
@ -261,13 +411,8 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
|
|||||||
REG(r14);
|
REG(r14);
|
||||||
REG(r15);
|
REG(r15);
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAVE_EXECINFO_H
|
|
||||||
int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE);
|
|
||||||
char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);
|
|
||||||
for (int i = 0; i < symbols; i++){
|
|
||||||
printf ("%s\n", strings[i]);
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
BACKTRACE();
|
||||||
exit(0);
|
exit(0);
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
@ -280,5 +425,9 @@ void Grid_debug_handler_init(void)
|
|||||||
sa.sa_flags = SA_SIGINFO;
|
sa.sa_flags = SA_SIGINFO;
|
||||||
sigaction(SIGSEGV,&sa,NULL);
|
sigaction(SIGSEGV,&sa,NULL);
|
||||||
sigaction(SIGTRAP,&sa,NULL);
|
sigaction(SIGTRAP,&sa,NULL);
|
||||||
|
|
||||||
|
feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
|
||||||
|
|
||||||
|
sigaction(SIGFPE,&sa,NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
33
lib/Init.h
33
lib/Init.h
@ -1,3 +1,31 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Init.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_INIT_H
|
#ifndef GRID_INIT_H
|
||||||
#define GRID_INIT_H
|
#define GRID_INIT_H
|
||||||
|
|
||||||
@ -5,6 +33,7 @@ namespace Grid {
|
|||||||
|
|
||||||
void Grid_init(int *argc,char ***argv);
|
void Grid_init(int *argc,char ***argv);
|
||||||
void Grid_finalize(void);
|
void Grid_finalize(void);
|
||||||
|
|
||||||
// internal, controled with --handle
|
// internal, controled with --handle
|
||||||
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
|
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
|
||||||
void Grid_debug_handler_init(void);
|
void Grid_debug_handler_init(void);
|
||||||
@ -16,11 +45,15 @@ namespace Grid {
|
|||||||
const std::vector<int> &GridDefaultMpi(void);
|
const std::vector<int> &GridDefaultMpi(void);
|
||||||
const int &GridThreads(void) ;
|
const int &GridThreads(void) ;
|
||||||
void GridSetThreads(int t) ;
|
void GridSetThreads(int t) ;
|
||||||
|
void GridLogTimestamp(int);
|
||||||
|
|
||||||
// Common parsing chores
|
// Common parsing chores
|
||||||
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option);
|
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option);
|
||||||
bool GridCmdOptionExists(char** begin, char** end, const std::string& option);
|
bool GridCmdOptionExists(char** begin, char** end, const std::string& option);
|
||||||
std::string GridCmdVectorIntToString(const std::vector<int> & vec);
|
std::string GridCmdVectorIntToString(const std::vector<int> & vec);
|
||||||
|
void GridCmdOptionCSL(std::string str,std::vector<std::string> & vec);
|
||||||
|
void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec);
|
||||||
|
|
||||||
|
|
||||||
void GridParseLayout(char **argv,int argc,
|
void GridParseLayout(char **argv,int argc,
|
||||||
std::vector<int> &latt,
|
std::vector<int> &latt,
|
||||||
|
@ -1,6 +1,33 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Lattice.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_LATTICE_H
|
#ifndef GRID_LATTICE_H
|
||||||
#define GRID_LATTICE_H
|
#define GRID_LATTICE_H
|
||||||
|
|
||||||
#include <lattice/Lattice_base.h>
|
#include <Grid/lattice/Lattice_base.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
32
lib/Lexicographic.h
Normal file
32
lib/Lexicographic.h
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
#ifndef GRID_LEXICOGRAPHIC_H
|
||||||
|
#define GRID_LEXICOGRAPHIC_H
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid{
|
||||||
|
|
||||||
|
class Lexicographic {
|
||||||
|
public:
|
||||||
|
|
||||||
|
static inline void CoorFromIndex (std::vector<int>& coor,int index,std::vector<int> &dims){
|
||||||
|
int nd= dims.size();
|
||||||
|
coor.resize(nd);
|
||||||
|
for(int d=0;d<nd;d++){
|
||||||
|
coor[d] = index % dims[d];
|
||||||
|
index = index / dims[d];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void IndexFromCoor (std::vector<int>& coor,int &index,std::vector<int> &dims){
|
||||||
|
int nd=dims.size();
|
||||||
|
int stride=1;
|
||||||
|
index=0;
|
||||||
|
for(int d=0;d<nd;d++){
|
||||||
|
index = index+stride*coor[d];
|
||||||
|
stride=stride*dims[d];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
120
lib/Log.cc
120
lib/Log.cc
@ -1,62 +1,112 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Log.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
|
#include <cxxabi.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
std::string demangle(const char* name) {
|
||||||
|
|
||||||
|
int status = -4; // some arbitrary value to eliminate the compiler warning
|
||||||
|
|
||||||
|
// enable c++11 by passing the flag -std=c++11 to g++
|
||||||
|
std::unique_ptr<char, void(*)(void*)> res {
|
||||||
|
abi::__cxa_demangle(name, NULL, NULL, &status),
|
||||||
|
std::free
|
||||||
|
};
|
||||||
|
|
||||||
|
return (status==0) ? res.get() : name ;
|
||||||
|
}
|
||||||
|
|
||||||
GridStopWatch Logger::StopWatch;
|
GridStopWatch Logger::StopWatch;
|
||||||
std::ostream Logger::devnull(0);
|
int Logger::timestamp;
|
||||||
|
std::ostream Logger::devnull(0);
|
||||||
|
|
||||||
GridLogger GridLogError (1,"Error");
|
void GridLogTimestamp(int on){
|
||||||
GridLogger GridLogWarning (1,"Warning");
|
Logger::Timestamp(on);
|
||||||
GridLogger GridLogMessage (1,"Message");
|
}
|
||||||
GridLogger GridLogDebug (1,"Debug");
|
|
||||||
GridLogger GridLogPerformance(1,"Performance");
|
|
||||||
GridLogger GridLogIterative (1,"Iterative");
|
|
||||||
|
|
||||||
void GridLogConfigure(std::vector<std::string> &logstreams)
|
Colours GridLogColours(0);
|
||||||
{
|
GridLogger GridLogError(1, "Error", GridLogColours, "RED");
|
||||||
|
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW");
|
||||||
|
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL");
|
||||||
|
GridLogger GridLogDebug(1, "Debug", GridLogColours, "PURPLE");
|
||||||
|
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
|
||||||
|
GridLogger GridLogIterative(1, "Iterative", GridLogColours, "BLUE");
|
||||||
|
GridLogger GridLogIntegrator(1, "Integrator", GridLogColours, "BLUE");
|
||||||
|
|
||||||
|
void GridLogConfigure(std::vector<std::string> &logstreams) {
|
||||||
GridLogError.Active(0);
|
GridLogError.Active(0);
|
||||||
GridLogWarning.Active(0);
|
GridLogWarning.Active(0);
|
||||||
GridLogMessage.Active(0);
|
GridLogMessage.Active(1); // at least the messages should be always on
|
||||||
GridLogIterative.Active(0);
|
GridLogIterative.Active(0);
|
||||||
GridLogDebug.Active(0);
|
GridLogDebug.Active(0);
|
||||||
GridLogPerformance.Active(0);
|
GridLogPerformance.Active(0);
|
||||||
|
GridLogIntegrator.Active(0);
|
||||||
|
GridLogColours.Active(0);
|
||||||
|
|
||||||
for(int i=0;i<logstreams.size();i++){
|
for (int i = 0; i < logstreams.size(); i++) {
|
||||||
if ( logstreams[i]== std::string("Error") ) GridLogError.Active(1);
|
if (logstreams[i] == std::string("Error")) GridLogError.Active(1);
|
||||||
if ( logstreams[i]== std::string("Warning") ) GridLogWarning.Active(1);
|
if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1);
|
||||||
if ( logstreams[i]== std::string("Message") ) GridLogMessage.Active(1);
|
if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0);
|
||||||
if ( logstreams[i]== std::string("Iterative") ) GridLogIterative.Active(1);
|
if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1);
|
||||||
if ( logstreams[i]== std::string("Debug") ) GridLogDebug.Active(1);
|
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
|
||||||
if ( logstreams[i]== std::string("Performance") ) GridLogPerformance.Active(1);
|
if (logstreams[i] == std::string("Performance"))
|
||||||
|
GridLogPerformance.Active(1);
|
||||||
|
if (logstreams[i] == std::string("Integrator")) GridLogIntegrator.Active(1);
|
||||||
|
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
// Verbose limiter on MPI tasks
|
// Verbose limiter on MPI tasks
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
void Grid_quiesce_nodes(void)
|
void Grid_quiesce_nodes(void) {
|
||||||
{
|
int me = 0;
|
||||||
#ifdef GRID_COMMS_MPI
|
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPI3L)
|
||||||
int me;
|
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD,&me);
|
#endif
|
||||||
if ( me ) {
|
#ifdef GRID_COMMS_SHMEM
|
||||||
|
me = shmem_my_pe();
|
||||||
|
#endif
|
||||||
|
if (me) {
|
||||||
std::cout.setstate(std::ios::badbit);
|
std::cout.setstate(std::ios::badbit);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Grid_unquiesce_nodes(void)
|
void Grid_unquiesce_nodes(void) {
|
||||||
{
|
|
||||||
#ifdef GRID_COMMS_MPI
|
#ifdef GRID_COMMS_MPI
|
||||||
std::cout.clear();
|
std::cout.clear();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ostream& operator<< (std::ostream& stream, const GridTime& time)
|
|
||||||
{
|
|
||||||
stream << time.count()<<" ms";
|
|
||||||
return stream;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
172
lib/Log.h
172
lib/Log.h
@ -1,44 +1,136 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Log.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
#ifndef GRID_LOG_H
|
#ifndef GRID_LOG_H
|
||||||
#define GRID_LOG_H
|
#define GRID_LOG_H
|
||||||
|
|
||||||
|
#ifdef HAVE_EXECINFO_H
|
||||||
|
#include <execinfo.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Dress the output; use std::chrono for time stamping via the StopWatch class
|
// Dress the output; use std::chrono for time stamping via the StopWatch class
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
class Colours{
|
||||||
|
protected:
|
||||||
|
bool is_active;
|
||||||
|
public:
|
||||||
|
std::map<std::string, std::string> colour;
|
||||||
|
|
||||||
|
Colours(bool activate=false){
|
||||||
|
Active(activate);
|
||||||
|
};
|
||||||
|
|
||||||
|
void Active(bool activate){
|
||||||
|
is_active=activate;
|
||||||
|
if (is_active){
|
||||||
|
colour["BLACK"] ="\033[30m";
|
||||||
|
colour["RED"] ="\033[31m";
|
||||||
|
colour["GREEN"] ="\033[32m";
|
||||||
|
colour["YELLOW"] ="\033[33m";
|
||||||
|
colour["BLUE"] ="\033[34m";
|
||||||
|
colour["PURPLE"] ="\033[35m";
|
||||||
|
colour["CYAN"] ="\033[36m";
|
||||||
|
colour["WHITE"] ="\033[37m";
|
||||||
|
colour["NORMAL"] ="\033[0;39m";
|
||||||
|
} else {
|
||||||
|
colour["BLACK"] ="";
|
||||||
|
colour["RED"] ="";
|
||||||
|
colour["GREEN"] ="";
|
||||||
|
colour["YELLOW"]="";
|
||||||
|
colour["BLUE"] ="";
|
||||||
|
colour["PURPLE"]="";
|
||||||
|
colour["CYAN"] ="";
|
||||||
|
colour["WHITE"] ="";
|
||||||
|
colour["NORMAL"]="";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
std::ostream& operator<< (std::ostream& stream, const GridTime& time);
|
|
||||||
|
|
||||||
class Logger {
|
class Logger {
|
||||||
protected:
|
protected:
|
||||||
int active;
|
Colours &Painter;
|
||||||
std::string name, topName;
|
int active;
|
||||||
|
static int timestamp;
|
||||||
|
std::string name, topName;
|
||||||
|
std::string COLOUR;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static GridStopWatch StopWatch;
|
static GridStopWatch StopWatch;
|
||||||
static std::ostream devnull;
|
static std::ostream devnull;
|
||||||
|
|
||||||
Logger(std::string topNm, int on, std::string nm)
|
std::string background() {return Painter.colour["NORMAL"];}
|
||||||
: active(on), name(nm), topName(topNm) {};
|
std::string evidence() {return Painter.colour["YELLOW"];}
|
||||||
|
std::string colour() {return Painter.colour[COLOUR];}
|
||||||
|
|
||||||
void Active(int on) {active = on;};
|
Logger(std::string topNm, int on, std::string nm, Colours& col_class, std::string col) : active(on),
|
||||||
int isActive(void) {return active;};
|
name(nm),
|
||||||
|
topName(topNm),
|
||||||
|
Painter(col_class),
|
||||||
|
COLOUR(col) {} ;
|
||||||
|
|
||||||
friend std::ostream& operator<< (std::ostream& stream, const Logger& log){
|
void Active(int on) {active = on;};
|
||||||
if ( log.active ) {
|
int isActive(void) {return active;};
|
||||||
StopWatch.Stop();
|
static void Timestamp(int on) {timestamp = on;};
|
||||||
GridTime now = StopWatch.Elapsed();
|
|
||||||
StopWatch.Start();
|
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
|
||||||
stream << std::setw(8) << std::left << log.topName << " : ";
|
|
||||||
stream << std::setw(12) << std::left << log.name << " : ";
|
if ( log.active ) {
|
||||||
stream << now << " : ";
|
stream << log.background()<< log.topName << log.background()<< " : ";
|
||||||
return stream;
|
stream << log.colour() <<std::setw(14) << std::left << log.name << log.background() << " : ";
|
||||||
} else {
|
if ( log.timestamp ) {
|
||||||
return devnull;
|
StopWatch.Stop();
|
||||||
}
|
GridTime now = StopWatch.Elapsed();
|
||||||
|
StopWatch.Start();
|
||||||
|
stream << log.evidence()<< now << log.background() << " : " ;
|
||||||
|
}
|
||||||
|
stream << log.colour();
|
||||||
|
return stream;
|
||||||
|
} else {
|
||||||
|
return devnull;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class GridLogger: public Logger {
|
class GridLogger: public Logger {
|
||||||
public:
|
public:
|
||||||
GridLogger(int on, std::string nm): Logger("Grid", on, nm){};
|
GridLogger(int on, std::string nm, Colours&col_class, std::string col_key = "NORMAL"):
|
||||||
|
Logger("Grid", on, nm, col_class, col_key){};
|
||||||
};
|
};
|
||||||
|
|
||||||
void GridLogConfigure(std::vector<std::string> &logstreams);
|
void GridLogConfigure(std::vector<std::string> &logstreams);
|
||||||
@ -49,6 +141,42 @@ extern GridLogger GridLogMessage;
|
|||||||
extern GridLogger GridLogDebug ;
|
extern GridLogger GridLogDebug ;
|
||||||
extern GridLogger GridLogPerformance;
|
extern GridLogger GridLogPerformance;
|
||||||
extern GridLogger GridLogIterative ;
|
extern GridLogger GridLogIterative ;
|
||||||
|
extern GridLogger GridLogIntegrator ;
|
||||||
|
extern Colours GridLogColours;
|
||||||
|
|
||||||
|
std::string demangle(const char* name) ;
|
||||||
|
|
||||||
|
#define _NBACKTRACE (256)
|
||||||
|
extern void * Grid_backtrace_buffer[_NBACKTRACE];
|
||||||
|
|
||||||
|
#define BACKTRACEFILE() {\
|
||||||
|
char string[20]; \
|
||||||
|
std::sprintf(string,"backtrace.%d",CartesianCommunicator::RankWorld()); \
|
||||||
|
std::FILE * fp = std::fopen(string,"w"); \
|
||||||
|
BACKTRACEFP(fp)\
|
||||||
|
std::fclose(fp); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_EXECINFO_H
|
||||||
|
#define BACKTRACEFP(fp) { \
|
||||||
|
int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE);\
|
||||||
|
char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);\
|
||||||
|
for (int i = 0; i < symbols; i++){\
|
||||||
|
std::fprintf (fp,"BackTrace Strings: %d %s\n",i, demangle(strings[i]).c_str()); std::fflush(fp); \
|
||||||
|
}\
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define BACKTRACEFP(fp) { \
|
||||||
|
std::fprintf (fp,"BT %d %lx\n",0, __builtin_return_address(0)); std::fflush(fp); \
|
||||||
|
std::fprintf (fp,"BT %d %lx\n",1, __builtin_return_address(1)); std::fflush(fp); \
|
||||||
|
std::fprintf (fp,"BT %d %lx\n",2, __builtin_return_address(2)); std::fflush(fp); \
|
||||||
|
std::fprintf (fp,"BT %d %lx\n",3, __builtin_return_address(3)); std::fflush(fp); \
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define BACKTRACE() BACKTRACEFP(stdout)
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,4 +0,0 @@
|
|||||||
|
|
||||||
HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/DenseMatrix.h ./algorithms/iterative/EigenSort.h ./algorithms/iterative/Francis.h ./algorithms/iterative/Householder.h ./algorithms/iterative/ImplicitlyRestartedLanczos.h ./algorithms/iterative/Matrix.h ./algorithms/iterative/MatrixUtils.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecConjugateResidual.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/Preconditioner.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./Init.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./Log.h ./Old/Tensor_peek.h ./Old/Tensor_poke.h ./parallelIO/BinaryIO.h ./parallelIO/NerscIO.h ./PerfCount.h ./pugixml/pugixml.h ./qcd/action/ActionBase.h ./qcd/action/ActionParams.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/FermionOperatorImpl.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/pseudofermion/EvenOddSchurDifferentiable.h ./qcd/action/pseudofermion/OneFlavourEvenOddRational.h ./qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h ./qcd/action/pseudofermion/OneFlavourRational.h ./qcd/action/pseudofermion/OneFlavourRationalRatio.h ./qcd/action/pseudofermion/TwoFlavour.h ./qcd/action/pseudofermion/TwoFlavourEvenOdd.h ./qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h ./qcd/action/pseudofermion/TwoFlavourRatio.h ./qcd/hmc/HMC.h ./qcd/hmc/integrators/Integrator.h ./qcd/hmc/integrators/Integrator_algorithm.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/SUn.h ./qcd/utils/WilsonLoops.h ./serialisation/BaseIO.h ./serialisation/BinaryIO.h ./serialisation/MacroMagic.h ./serialisation/Serialisation.h ./serialisation/TextIO.h ./serialisation/XmlIO.h ./simd/Avx512Asm.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_imci.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_determinant.h ./tensors/Tensor_exp.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_index.h ./tensors/Tensor_inner.h ./tensors/Tensor_logical.h ./tensors/Tensor_outer.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h ./Timer.h
|
|
||||||
|
|
||||||
CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./Init.cc ./Log.cc ./PerfCount.cc ./pugixml/pugixml.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsAsm.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/hmc/HMC.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./serialisation/BinaryIO.cc ./serialisation/TextIO.cc ./serialisation/XmlIO.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
|
|
@ -1,28 +1,37 @@
|
|||||||
# additional include paths necessary to compile the C++ library
|
|
||||||
AM_CXXFLAGS = -I$(top_srcdir)/
|
|
||||||
|
|
||||||
extra_sources=
|
extra_sources=
|
||||||
if BUILD_COMMS_MPI
|
if BUILD_COMMS_MPI
|
||||||
extra_sources+=communicator/Communicator_mpi.cc
|
extra_sources+=communicator/Communicator_mpi.cc
|
||||||
|
extra_sources+=communicator/Communicator_base.cc
|
||||||
|
endif
|
||||||
|
|
||||||
|
if BUILD_COMMS_MPI3
|
||||||
|
extra_sources+=communicator/Communicator_mpi3.cc
|
||||||
|
extra_sources+=communicator/Communicator_base.cc
|
||||||
|
endif
|
||||||
|
|
||||||
|
if BUILD_COMMS_MPI3L
|
||||||
|
extra_sources+=communicator/Communicator_mpi3_leader.cc
|
||||||
|
extra_sources+=communicator/Communicator_base.cc
|
||||||
|
endif
|
||||||
|
|
||||||
|
if BUILD_COMMS_SHMEM
|
||||||
|
extra_sources+=communicator/Communicator_shmem.cc
|
||||||
|
extra_sources+=communicator/Communicator_base.cc
|
||||||
endif
|
endif
|
||||||
|
|
||||||
if BUILD_COMMS_NONE
|
if BUILD_COMMS_NONE
|
||||||
extra_sources+=communicator/Communicator_none.cc
|
extra_sources+=communicator/Communicator_none.cc
|
||||||
|
extra_sources+=communicator/Communicator_base.cc
|
||||||
endif
|
endif
|
||||||
|
|
||||||
#
|
#
|
||||||
# Libraries
|
# Libraries
|
||||||
#
|
#
|
||||||
|
|
||||||
include Make.inc
|
include Make.inc
|
||||||
|
include Eigen.inc
|
||||||
|
|
||||||
lib_LIBRARIES = libGrid.a
|
lib_LIBRARIES = libGrid.a
|
||||||
libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
|
|
||||||
|
|
||||||
|
|
||||||
# qcd/action/fermion/PartialFractionFermion5D.cc\ \
|
|
||||||
#
|
|
||||||
# Include files
|
|
||||||
#
|
|
||||||
nobase_include_HEADERS=$(HFILES)
|
|
||||||
|
|
||||||
|
libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
|
||||||
|
libGrid_adir = $(pkgincludedir)
|
||||||
|
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h
|
||||||
|
BIN
lib/Old/Endeavour.tgz
Normal file
BIN
lib/Old/Endeavour.tgz
Normal file
Binary file not shown.
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Old/Tensor_peek.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_MATH_PEEK_H
|
#ifndef GRID_MATH_PEEK_H
|
||||||
#define GRID_MATH_PEEK_H
|
#define GRID_MATH_PEEK_H
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Old/Tensor_poke.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_MATH_POKE_H
|
#ifndef GRID_MATH_POKE_H
|
||||||
#define GRID_MATH_POKE_H
|
#define GRID_MATH_POKE_H
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/PerfCount.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
#include <PerfCount.h>
|
#include <PerfCount.h>
|
||||||
@ -5,28 +32,44 @@
|
|||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
|
#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
|
||||||
|
#define RawConfig(A,B) (A<<8|B)
|
||||||
const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {
|
const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES , "CPUCYCLES.........." },
|
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES , "CACHE_REFERENCES..." , INSTRUCTIONS},
|
||||||
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS , "INSTRUCTIONS......." },
|
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES , "CACHE_MISSES......." , CACHE_REFERENCES},
|
||||||
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES , "CACHE_REFERENCES..." },
|
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES , "CPUCYCLES.........." , INSTRUCTIONS},
|
||||||
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES , "CACHE_MISSES......." },
|
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS , "INSTRUCTIONS......." , CPUCYCLES },
|
||||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,MISS) , "L1D_READ_MISS......"},
|
// 4
|
||||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,ACCESS) , "L1D_READ_ACCESS...."},
|
#ifdef AVX512
|
||||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,MISS) , "L1D_WRITE_MISS....."},
|
{ PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", CPUCYCLES },
|
||||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,ACCESS) , "L1D_WRITE_ACCESS..."},
|
{ PERF_TYPE_RAW, RawConfig(0x01,0x04), "L1_MISS_LOADS......", L1D_READ_ACCESS },
|
||||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,MISS) , "L1D_PREFETCH_MISS.."},
|
{ PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", L1D_READ_ACCESS },
|
||||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) , "L1D_PREFETCH_ACCESS"},
|
{ PERF_TYPE_RAW, RawConfig(0x02,0x04), "L2_HIT_LOADS.......", L1D_READ_ACCESS },
|
||||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,READ,MISS) , "LL_READ_MISS......."},
|
{ PERF_TYPE_RAW, RawConfig(0x04,0x04), "L2_MISS_LOADS......", L1D_READ_ACCESS },
|
||||||
// { PERF_TYPE_HW_CACHE, CacheControl(LL,READ,ACCESS) , "LL_READ_ACCESS....."},
|
{ PERF_TYPE_RAW, RawConfig(0x10,0x04), "UTLB_MISS_LOADS....", L1D_READ_ACCESS },
|
||||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,MISS) , "LL_WRITE_MISS......"},
|
{ PERF_TYPE_RAW, RawConfig(0x08,0x04), "DTLB_MISS_LOADS....", L1D_READ_ACCESS },
|
||||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,ACCESS) , "LL_WRITE_ACCESS...."},
|
// 11
|
||||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,MISS) , "LL_PREFETCH_MISS..."},
|
#else
|
||||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,ACCESS) , "LL_PREFETCH_ACCESS."},
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,ACCESS) , "L1D_READ_ACCESS....",INSTRUCTIONS},
|
||||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,MISS) , "L1I_READ_MISS......"},
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,MISS) , "L1D_READ_MISS......",L1D_READ_ACCESS},
|
||||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,ACCESS) , "L1I_READ_ACCESS...."}
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,MISS) , "L1D_WRITE_MISS.....",L1D_READ_ACCESS},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,ACCESS) , "L1D_WRITE_ACCESS...",L1D_READ_ACCESS},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,MISS) , "L1D_PREFETCH_MISS..",L1D_READ_ACCESS},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) , "L1D_PREFETCH_ACCESS",L1D_READ_ACCESS},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) , "L1D_PREFETCH_ACCESS",L1D_READ_ACCESS},
|
||||||
|
// 11
|
||||||
#endif
|
#endif
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(LL,READ,MISS) , "LL_READ_MISS.......",L1D_READ_ACCESS},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(LL,READ,ACCESS) , "LL_READ_ACCESS.....",L1D_READ_ACCESS},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,MISS) , "LL_WRITE_MISS......",L1D_READ_ACCESS},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,ACCESS) , "LL_WRITE_ACCESS....",L1D_READ_ACCESS},
|
||||||
|
//15
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,MISS) , "LL_PREFETCH_MISS...",L1D_READ_ACCESS},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,ACCESS) , "LL_PREFETCH_ACCESS.",L1D_READ_ACCESS},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,MISS) , "L1I_READ_MISS......",INSTRUCTIONS},
|
||||||
|
{ PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,ACCESS) , "L1I_READ_ACCESS....",INSTRUCTIONS}
|
||||||
|
//19
|
||||||
// { PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, "STALL_CYCLES" },
|
// { PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, "STALL_CYCLES" },
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
145
lib/PerfCount.h
145
lib/PerfCount.h
@ -1,3 +1,32 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/PerfCount.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <peterboyle@MacBook-Pro.local>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_PERFCOUNT_H
|
#ifndef GRID_PERFCOUNT_H
|
||||||
#define GRID_PERFCOUNT_H
|
#define GRID_PERFCOUNT_H
|
||||||
|
|
||||||
@ -5,7 +34,7 @@
|
|||||||
#include <ctime>
|
#include <ctime>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
@ -14,8 +43,11 @@
|
|||||||
#else
|
#else
|
||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
#endif
|
#endif
|
||||||
namespace Grid {
|
#ifdef __x86_64__
|
||||||
|
#include <x86intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
|
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
|
||||||
@ -29,6 +61,48 @@ static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef TIMERS_OFF
|
||||||
|
|
||||||
|
|
||||||
|
inline uint64_t cyclecount(void){
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#define __SSC_MARK(mark) __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(mark):"%ebx")
|
||||||
|
#define __SSC_STOP __SSC_MARK(0x110)
|
||||||
|
#define __SSC_START __SSC_MARK(0x111)
|
||||||
|
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define __SSC_MARK(mark)
|
||||||
|
#define __SSC_STOP
|
||||||
|
#define __SSC_START
|
||||||
|
|
||||||
|
/*
|
||||||
|
* cycle counters arch dependent
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __bgq__
|
||||||
|
inline uint64_t cyclecount(void){
|
||||||
|
uint64_t tmp;
|
||||||
|
asm volatile ("mfspr %0,0x10C" : "=&r" (tmp) );
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
#elif defined __x86_64__
|
||||||
|
inline uint64_t cyclecount(void){
|
||||||
|
return __rdtsc();
|
||||||
|
// unsigned int dummy;
|
||||||
|
// return __rdtscp(&dummy);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
|
||||||
|
inline uint64_t cyclecount(void){
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
class PerformanceCounter {
|
class PerformanceCounter {
|
||||||
private:
|
private:
|
||||||
@ -38,6 +112,7 @@ private:
|
|||||||
uint32_t type;
|
uint32_t type;
|
||||||
uint64_t config;
|
uint64_t config;
|
||||||
const char *name;
|
const char *name;
|
||||||
|
int normalisation;
|
||||||
} PerformanceCounterConfig;
|
} PerformanceCounterConfig;
|
||||||
|
|
||||||
static const PerformanceCounterConfig PerformanceCounterConfigs [];
|
static const PerformanceCounterConfig PerformanceCounterConfigs [];
|
||||||
@ -45,26 +120,12 @@ private:
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
enum PerformanceCounterType {
|
enum PerformanceCounterType {
|
||||||
CPUCYCLES=0,
|
CACHE_REFERENCES=0,
|
||||||
INSTRUCTIONS,
|
CACHE_MISSES=1,
|
||||||
// STALL_CYCLES,
|
CPUCYCLES=2,
|
||||||
CACHE_REFERENCES,
|
INSTRUCTIONS=3,
|
||||||
CACHE_MISSES,
|
L1D_READ_ACCESS=4,
|
||||||
L1D_READ_MISS,
|
PERFORMANCE_COUNTER_NUM_TYPES=19
|
||||||
L1D_READ_ACCESS,
|
|
||||||
L1D_WRITE_MISS,
|
|
||||||
L1D_WRITE_ACCESS,
|
|
||||||
L1D_PREFETCH_MISS,
|
|
||||||
L1D_PREFETCH_ACCESS,
|
|
||||||
LL_READ_MISS,
|
|
||||||
// LL_READ_ACCESS,
|
|
||||||
LL_WRITE_MISS,
|
|
||||||
LL_WRITE_ACCESS,
|
|
||||||
LL_PREFETCH_MISS,
|
|
||||||
LL_PREFETCH_ACCESS,
|
|
||||||
L1I_READ_MISS,
|
|
||||||
L1I_READ_ACCESS,
|
|
||||||
PERFORMANCE_COUNTER_NUM_TYPES
|
|
||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -72,8 +133,10 @@ public:
|
|||||||
int PCT;
|
int PCT;
|
||||||
|
|
||||||
long long count;
|
long long count;
|
||||||
|
long long cycles;
|
||||||
int fd;
|
int fd;
|
||||||
uint64_t elapsed;
|
int cyclefd;
|
||||||
|
unsigned long long elapsed;
|
||||||
uint64_t begin;
|
uint64_t begin;
|
||||||
|
|
||||||
static int NumTypes(void){
|
static int NumTypes(void){
|
||||||
@ -85,7 +148,9 @@ public:
|
|||||||
assert(_pct>=0);
|
assert(_pct>=0);
|
||||||
assert(_pct<PERFORMANCE_COUNTER_NUM_TYPES);
|
assert(_pct<PERFORMANCE_COUNTER_NUM_TYPES);
|
||||||
fd=-1;
|
fd=-1;
|
||||||
|
cyclefd=-1;
|
||||||
count=0;
|
count=0;
|
||||||
|
cycles=0;
|
||||||
PCT =_pct;
|
PCT =_pct;
|
||||||
Open();
|
Open();
|
||||||
#endif
|
#endif
|
||||||
@ -110,6 +175,15 @@ public:
|
|||||||
fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name);
|
fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name);
|
||||||
perror("Error is");
|
perror("Error is");
|
||||||
}
|
}
|
||||||
|
int norm = PerformanceCounterConfigs[PCT].normalisation;
|
||||||
|
pe.type = PerformanceCounterConfigs[norm].type;
|
||||||
|
pe.config= PerformanceCounterConfigs[norm].config;
|
||||||
|
name = PerformanceCounterConfigs[norm].name;
|
||||||
|
cyclefd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
|
||||||
|
if (cyclefd == -1) {
|
||||||
|
fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name);
|
||||||
|
perror("Error is");
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,10 +191,12 @@ public:
|
|||||||
{
|
{
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
if ( fd!= -1) {
|
if ( fd!= -1) {
|
||||||
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
|
::ioctl(fd, PERF_EVENT_IOC_RESET, 0);
|
||||||
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
|
::ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
|
||||||
|
::ioctl(cyclefd, PERF_EVENT_IOC_RESET, 0);
|
||||||
|
::ioctl(cyclefd, PERF_EVENT_IOC_ENABLE, 0);
|
||||||
}
|
}
|
||||||
begin =__rdtsc();
|
begin =cyclecount();
|
||||||
#else
|
#else
|
||||||
begin = 0;
|
begin = 0;
|
||||||
#endif
|
#endif
|
||||||
@ -128,12 +204,15 @@ public:
|
|||||||
|
|
||||||
void Stop(void) {
|
void Stop(void) {
|
||||||
count=0;
|
count=0;
|
||||||
|
cycles=0;
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
if ( fd!= -1) {
|
if ( fd!= -1) {
|
||||||
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
|
::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
|
||||||
|
::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0);
|
||||||
::read(fd, &count, sizeof(long long));
|
::read(fd, &count, sizeof(long long));
|
||||||
|
::read(cyclefd, &cycles, sizeof(long long));
|
||||||
}
|
}
|
||||||
elapsed = __rdtsc() - begin;
|
elapsed = cyclecount() - begin;
|
||||||
#else
|
#else
|
||||||
elapsed = 0;
|
elapsed = 0;
|
||||||
#endif
|
#endif
|
||||||
@ -141,16 +220,20 @@ public:
|
|||||||
}
|
}
|
||||||
void Report(void) {
|
void Report(void) {
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
printf("%llu cycles %s = %20llu\n", elapsed , PerformanceCounterConfigs[PCT].name, count);
|
int N = PerformanceCounterConfigs[PCT].normalisation;
|
||||||
|
const char * sn = PerformanceCounterConfigs[N].name ;
|
||||||
|
const char * sc = PerformanceCounterConfigs[PCT].name;
|
||||||
|
std::printf("tsc = %llu %s = %llu %s = %20llu\n (%s/%s) rate = %lf\n", elapsed,sn ,cycles,
|
||||||
|
sc, count, sc,sn, (double)count/(double)cycles);
|
||||||
#else
|
#else
|
||||||
printf("%llu cycles \n", elapsed );
|
std::printf("%llu cycles \n", elapsed );
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
~PerformanceCounter()
|
~PerformanceCounter()
|
||||||
{
|
{
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
close(fd);
|
::close(fd); ::close(cyclefd);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
57
lib/Simd.h
57
lib/Simd.h
@ -1,3 +1,33 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Simd.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: neo <cossu@post.kek.jp>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_SIMD_H
|
#ifndef GRID_SIMD_H
|
||||||
#define GRID_SIMD_H
|
#define GRID_SIMD_H
|
||||||
|
|
||||||
@ -13,10 +43,13 @@
|
|||||||
|
|
||||||
|
|
||||||
#define _MM_SELECT_FOUR_FOUR(A,B,C,D) ((A<<6)|(B<<4)|(C<<2)|(D))
|
#define _MM_SELECT_FOUR_FOUR(A,B,C,D) ((A<<6)|(B<<4)|(C<<2)|(D))
|
||||||
|
#define _MM_SELECT_FOUR_FOUR_STRING(A,B,C,D) "((" #A "<<6)|(" #B "<<4)|(" #C "<<2)|(" #D "))"
|
||||||
#define _MM_SELECT_EIGHT_TWO(A,B,C,D,E,F,G,H) ((A<<7)|(B<<6)|(C<<5)|(D<<4)|(E<<3)|(F<<2)|(G<<4)|(H))
|
#define _MM_SELECT_EIGHT_TWO(A,B,C,D,E,F,G,H) ((A<<7)|(B<<6)|(C<<5)|(D<<4)|(E<<3)|(F<<2)|(G<<4)|(H))
|
||||||
#define _MM_SELECT_FOUR_TWO (A,B,C,D) _MM_SELECT_EIGHT_TWO(0,0,0,0,A,B,C,D)
|
#define _MM_SELECT_FOUR_TWO (A,B,C,D) _MM_SELECT_EIGHT_TWO(0,0,0,0,A,B,C,D)
|
||||||
#define _MM_SELECT_TWO_TWO (A,B) _MM_SELECT_FOUR_TWO(0,0,A,B)
|
#define _MM_SELECT_TWO_TWO (A,B) _MM_SELECT_FOUR_TWO(0,0,A,B)
|
||||||
|
|
||||||
|
#define RotateBit (0x100)
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
typedef uint32_t Integer;
|
typedef uint32_t Integer;
|
||||||
@ -86,6 +119,14 @@ namespace Grid {
|
|||||||
inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));}
|
inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));}
|
||||||
inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));}
|
inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));}
|
||||||
inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));}
|
inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));}
|
||||||
|
|
||||||
|
// define projections to real and imaginay parts
|
||||||
|
inline ComplexF projReal(const ComplexF &r){return( ComplexF(std::real(r), 0.0));}
|
||||||
|
inline ComplexD projReal(const ComplexD &r){return( ComplexD(std::real(r), 0.0));}
|
||||||
|
inline ComplexF projImag(const ComplexF &r){return (ComplexF(std::imag(r), 0.0 ));}
|
||||||
|
inline ComplexD projImag(const ComplexD &r){return (ComplexD(std::imag(r), 0.0));}
|
||||||
|
|
||||||
|
// define auxiliary functions for complex computations
|
||||||
inline void timesI(ComplexF &ret,const ComplexF &r) { ret = timesI(r);}
|
inline void timesI(ComplexF &ret,const ComplexF &r) { ret = timesI(r);}
|
||||||
inline void timesI(ComplexD &ret,const ComplexD &r) { ret = timesI(r);}
|
inline void timesI(ComplexD &ret,const ComplexD &r) { ret = timesI(r);}
|
||||||
inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);}
|
inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);}
|
||||||
@ -131,8 +172,8 @@ namespace Grid {
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#include <simd/Grid_vector_types.h>
|
#include "simd/Grid_vector_types.h"
|
||||||
#include <simd/Grid_vector_unops.h>
|
#include "simd/Grid_vector_unops.h"
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
// Default precision
|
// Default precision
|
||||||
@ -196,6 +237,18 @@ namespace Grid {
|
|||||||
stream<<">";
|
stream<<">";
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
inline std::ostream& operator<< (std::ostream& stream, const vInteger &o){
|
||||||
|
int nn=vInteger::Nsimd();
|
||||||
|
std::vector<Integer,alignedAllocator<Integer> > buf(nn);
|
||||||
|
vstore(o,&buf[0]);
|
||||||
|
stream<<"<";
|
||||||
|
for(int i=0;i<nn;i++){
|
||||||
|
stream<<buf[i];
|
||||||
|
if(i<nn-1) stream<<",";
|
||||||
|
}
|
||||||
|
stream<<">";
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
247
lib/Stat.cc
Normal file
247
lib/Stat.cc
Normal file
@ -0,0 +1,247 @@
|
|||||||
|
#include <Grid.h>
|
||||||
|
#include <PerfCount.h>
|
||||||
|
#include <Stat.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
|
||||||
|
bool PmuStat::pmu_initialized=false;
|
||||||
|
|
||||||
|
|
||||||
|
void PmuStat::init(const char *regname)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
name = regname;
|
||||||
|
if (!pmu_initialized)
|
||||||
|
{
|
||||||
|
std::cout<<"initialising pmu"<<std::endl;
|
||||||
|
pmu_initialized = true;
|
||||||
|
pmu_init();
|
||||||
|
}
|
||||||
|
clear();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::clear(void)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
count = 0;
|
||||||
|
tregion = 0;
|
||||||
|
pmc0 = 0;
|
||||||
|
pmc1 = 0;
|
||||||
|
inst = 0;
|
||||||
|
cyc = 0;
|
||||||
|
ref = 0;
|
||||||
|
tcycles = 0;
|
||||||
|
reads = 0;
|
||||||
|
writes = 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::print(void)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
std::cout <<"Reg "<<std::string(name)<<":\n";
|
||||||
|
std::cout <<" region "<<tregion<<std::endl;
|
||||||
|
std::cout <<" cycles "<<tcycles<<std::endl;
|
||||||
|
std::cout <<" inst "<<inst <<std::endl;
|
||||||
|
std::cout <<" cyc "<<cyc <<std::endl;
|
||||||
|
std::cout <<" ref "<<ref <<std::endl;
|
||||||
|
std::cout <<" pmc0 "<<pmc0 <<std::endl;
|
||||||
|
std::cout <<" pmc1 "<<pmc1 <<std::endl;
|
||||||
|
std::cout <<" count "<<count <<std::endl;
|
||||||
|
std::cout <<" reads "<<reads <<std::endl;
|
||||||
|
std::cout <<" writes "<<writes <<std::endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::start(void)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
pmu_start();
|
||||||
|
++count;
|
||||||
|
xmemctrs(&mrstart, &mwstart);
|
||||||
|
tstart = __rdtsc();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::enter(int t)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
counters[0][t] = __rdpmc(0);
|
||||||
|
counters[1][t] = __rdpmc(1);
|
||||||
|
counters[2][t] = __rdpmc((1<<30)|0);
|
||||||
|
counters[3][t] = __rdpmc((1<<30)|1);
|
||||||
|
counters[4][t] = __rdpmc((1<<30)|2);
|
||||||
|
counters[5][t] = __rdtsc();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::exit(int t)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
counters[0][t] = __rdpmc(0) - counters[0][t];
|
||||||
|
counters[1][t] = __rdpmc(1) - counters[1][t];
|
||||||
|
counters[2][t] = __rdpmc((1<<30)|0) - counters[2][t];
|
||||||
|
counters[3][t] = __rdpmc((1<<30)|1) - counters[3][t];
|
||||||
|
counters[4][t] = __rdpmc((1<<30)|2) - counters[4][t];
|
||||||
|
counters[5][t] = __rdtsc() - counters[5][t];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::accum(int nthreads)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
tend = __rdtsc();
|
||||||
|
xmemctrs(&mrend, &mwend);
|
||||||
|
pmu_stop();
|
||||||
|
for (int t = 0; t < nthreads; ++t) {
|
||||||
|
pmc0 += counters[0][t];
|
||||||
|
pmc1 += counters[1][t];
|
||||||
|
inst += counters[2][t];
|
||||||
|
cyc += counters[3][t];
|
||||||
|
ref += counters[4][t];
|
||||||
|
tcycles += counters[5][t];
|
||||||
|
}
|
||||||
|
uint64_t region = tend - tstart;
|
||||||
|
tregion += region;
|
||||||
|
uint64_t mreads = mrend - mrstart;
|
||||||
|
reads += mreads;
|
||||||
|
uint64_t mwrites = mwend - mwstart;
|
||||||
|
writes += mwrites;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void PmuStat::pmu_fini(void) {}
|
||||||
|
void PmuStat::pmu_start(void) {};
|
||||||
|
void PmuStat::pmu_stop(void) {};
|
||||||
|
void PmuStat::pmu_init(void)
|
||||||
|
{
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
KNLsetup();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::xmemctrs(uint64_t *mr, uint64_t *mw)
|
||||||
|
{
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
ctrs c;
|
||||||
|
KNLreadctrs(c);
|
||||||
|
uint64_t emr = 0, emw = 0;
|
||||||
|
for (int i = 0; i < NEDC; ++i)
|
||||||
|
{
|
||||||
|
emr += c.edcrd[i];
|
||||||
|
emw += c.edcwr[i];
|
||||||
|
}
|
||||||
|
*mr = emr;
|
||||||
|
*mw = emw;
|
||||||
|
#else
|
||||||
|
*mr = *mw = 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
|
||||||
|
struct knl_gbl_ PmuStat::gbl;
|
||||||
|
|
||||||
|
#define PMU_MEM
|
||||||
|
|
||||||
|
void PmuStat::KNLevsetup(const char *ename, int &fd, int event, int umask)
|
||||||
|
{
|
||||||
|
char fname[1024];
|
||||||
|
snprintf(fname, sizeof(fname), "%s/type", ename);
|
||||||
|
FILE *fp = fopen(fname, "r");
|
||||||
|
if (fp == 0) {
|
||||||
|
::printf("open %s", fname);
|
||||||
|
::exit(0);
|
||||||
|
}
|
||||||
|
int type;
|
||||||
|
int ret = fscanf(fp, "%d", &type);
|
||||||
|
assert(ret == 1);
|
||||||
|
fclose(fp);
|
||||||
|
// std::cout << "Using PMU type "<<type<<" from " << std::string(ename) <<std::endl;
|
||||||
|
|
||||||
|
struct perf_event_attr hw = {};
|
||||||
|
hw.size = sizeof(hw);
|
||||||
|
hw.type = type;
|
||||||
|
// see /sys/devices/uncore_*/format/*
|
||||||
|
// All of the events we are interested in are configured the same way, but
|
||||||
|
// that isn't always true. Proper code would parse the format files
|
||||||
|
hw.config = event | (umask << 8);
|
||||||
|
//hw.read_format = PERF_FORMAT_GROUP;
|
||||||
|
// unfortunately the above only works within a single PMU; might
|
||||||
|
// as well just read them one at a time
|
||||||
|
int cpu = 0;
|
||||||
|
fd = perf_event_open(&hw, -1, cpu, -1, 0);
|
||||||
|
if (fd == -1) {
|
||||||
|
::printf("CPU %d, box %s, event 0x%lx", cpu, ename, hw.config);
|
||||||
|
::exit(0);
|
||||||
|
} else {
|
||||||
|
// std::cout << "event "<<std::string(ename)<<" set up for fd "<<fd<<" hw.config "<<hw.config <<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void PmuStat::KNLsetup(void){
|
||||||
|
|
||||||
|
int ret;
|
||||||
|
char fname[1024];
|
||||||
|
|
||||||
|
// MC RPQ inserts and WPQ inserts (reads & writes)
|
||||||
|
for (int mc = 0; mc < NMC; ++mc)
|
||||||
|
{
|
||||||
|
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_imc_%d",mc);
|
||||||
|
// RPQ Inserts
|
||||||
|
KNLevsetup(fname, gbl.mc_rd[mc], 0x1, 0x1);
|
||||||
|
// WPQ Inserts
|
||||||
|
KNLevsetup(fname, gbl.mc_wr[mc], 0x2, 0x1);
|
||||||
|
}
|
||||||
|
// EDC RPQ inserts and WPQ inserts
|
||||||
|
for (int edc=0; edc < NEDC; ++edc)
|
||||||
|
{
|
||||||
|
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_eclk_%d",edc);
|
||||||
|
// RPQ inserts
|
||||||
|
KNLevsetup(fname, gbl.edc_rd[edc], 0x1, 0x1);
|
||||||
|
// WPQ inserts
|
||||||
|
KNLevsetup(fname, gbl.edc_wr[edc], 0x2, 0x1);
|
||||||
|
}
|
||||||
|
// EDC HitE, HitM, MissE, MissM
|
||||||
|
for (int edc=0; edc < NEDC; ++edc)
|
||||||
|
{
|
||||||
|
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_uclk_%d", edc);
|
||||||
|
KNLevsetup(fname, gbl.edc_hite[edc], 0x2, 0x1);
|
||||||
|
KNLevsetup(fname, gbl.edc_hitm[edc], 0x2, 0x2);
|
||||||
|
KNLevsetup(fname, gbl.edc_misse[edc], 0x2, 0x4);
|
||||||
|
KNLevsetup(fname, gbl.edc_missm[edc], 0x2, 0x8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t PmuStat::KNLreadctr(int fd)
|
||||||
|
{
|
||||||
|
uint64_t data;
|
||||||
|
size_t s = ::read(fd, &data, sizeof(data));
|
||||||
|
if (s != sizeof(uint64_t)){
|
||||||
|
::printf("read counter %lu", s);
|
||||||
|
::exit(0);
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PmuStat::KNLreadctrs(ctrs &c)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < NMC; ++i)
|
||||||
|
{
|
||||||
|
c.mcrd[i] = KNLreadctr(gbl.mc_rd[i]);
|
||||||
|
c.mcwr[i] = KNLreadctr(gbl.mc_wr[i]);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < NEDC; ++i)
|
||||||
|
{
|
||||||
|
c.edcrd[i] = KNLreadctr(gbl.edc_rd[i]);
|
||||||
|
c.edcwr[i] = KNLreadctr(gbl.edc_wr[i]);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < NEDC; ++i)
|
||||||
|
{
|
||||||
|
c.edchite[i] = KNLreadctr(gbl.edc_hite[i]);
|
||||||
|
c.edchitm[i] = KNLreadctr(gbl.edc_hitm[i]);
|
||||||
|
c.edcmisse[i] = KNLreadctr(gbl.edc_misse[i]);
|
||||||
|
c.edcmissm[i] = KNLreadctr(gbl.edc_missm[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
104
lib/Stat.h
Normal file
104
lib/Stat.h
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
#ifndef _GRID_STAT_H
|
||||||
|
#define _GRID_STAT_H
|
||||||
|
|
||||||
|
#ifdef AVX512
|
||||||
|
#define _KNIGHTS_LANDING_ROOTONLY
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Extra KNL counters from MCDRAM
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
#define NMC 6
|
||||||
|
#define NEDC 8
|
||||||
|
struct ctrs
|
||||||
|
{
|
||||||
|
uint64_t mcrd[NMC];
|
||||||
|
uint64_t mcwr[NMC];
|
||||||
|
uint64_t edcrd[NEDC];
|
||||||
|
uint64_t edcwr[NEDC];
|
||||||
|
uint64_t edchite[NEDC];
|
||||||
|
uint64_t edchitm[NEDC];
|
||||||
|
uint64_t edcmisse[NEDC];
|
||||||
|
uint64_t edcmissm[NEDC];
|
||||||
|
};
|
||||||
|
// Peter/Azusa:
|
||||||
|
// Our modification of a code provided by Larry Meadows from Intel
|
||||||
|
// Verified by email exchange non-NDA, ok for github. Should be as uses /sys/devices/ FS
|
||||||
|
// so is already public and in the linux kernel for KNL.
|
||||||
|
struct knl_gbl_
|
||||||
|
{
|
||||||
|
int mc_rd[NMC];
|
||||||
|
int mc_wr[NMC];
|
||||||
|
int edc_rd[NEDC];
|
||||||
|
int edc_wr[NEDC];
|
||||||
|
int edc_hite[NEDC];
|
||||||
|
int edc_hitm[NEDC];
|
||||||
|
int edc_misse[NEDC];
|
||||||
|
int edc_missm[NEDC];
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
class PmuStat
|
||||||
|
{
|
||||||
|
uint64_t counters[8][256];
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
static struct knl_gbl_ gbl;
|
||||||
|
#endif
|
||||||
|
const char *name;
|
||||||
|
|
||||||
|
uint64_t reads; // memory reads
|
||||||
|
uint64_t writes; // memory writes
|
||||||
|
uint64_t mrstart; // memory read counter at start of parallel region
|
||||||
|
uint64_t mrend; // memory read counter at end of parallel region
|
||||||
|
uint64_t mwstart; // memory write counter at start of parallel region
|
||||||
|
uint64_t mwend; // memory write counter at end of parallel region
|
||||||
|
|
||||||
|
// cumulative counters
|
||||||
|
uint64_t count; // number of invocations
|
||||||
|
uint64_t tregion; // total time in parallel region (from thread 0)
|
||||||
|
uint64_t tcycles; // total cycles inside parallel region
|
||||||
|
uint64_t inst, ref, cyc; // fixed counters
|
||||||
|
uint64_t pmc0, pmc1;// pmu
|
||||||
|
// add memory counters here
|
||||||
|
// temp variables
|
||||||
|
uint64_t tstart; // tsc at start of parallel region
|
||||||
|
uint64_t tend; // tsc at end of parallel region
|
||||||
|
// map for ctrs values
|
||||||
|
// 0 pmc0 start
|
||||||
|
// 1 pmc0 end
|
||||||
|
// 2 pmc1 start
|
||||||
|
// 3 pmc1 end
|
||||||
|
// 4 tsc start
|
||||||
|
// 5 tsc end
|
||||||
|
static bool pmu_initialized;
|
||||||
|
public:
|
||||||
|
static bool is_init(void){ return pmu_initialized;}
|
||||||
|
static void pmu_init(void);
|
||||||
|
static void pmu_fini(void);
|
||||||
|
static void pmu_start(void);
|
||||||
|
static void pmu_stop(void);
|
||||||
|
void accum(int nthreads);
|
||||||
|
static void xmemctrs(uint64_t *mr, uint64_t *mw);
|
||||||
|
void start(void);
|
||||||
|
void enter(int t);
|
||||||
|
void exit(int t);
|
||||||
|
void print(void);
|
||||||
|
void init(const char *regname);
|
||||||
|
void clear(void);
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
static void KNLsetup(void);
|
||||||
|
static uint64_t KNLreadctr(int fd);
|
||||||
|
static void KNLreadctrs(ctrs &c);
|
||||||
|
static void KNLevsetup(const char *ename, int &fd, int event, int umask);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
1421
lib/Stencil.h
1421
lib/Stencil.h
File diff suppressed because it is too large
Load Diff
@ -1,22 +1,51 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Tensors.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: neo <cossu@post.kek.jp>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_MATH_H
|
#ifndef GRID_MATH_H
|
||||||
#define GRID_MATH_H
|
#define GRID_MATH_H
|
||||||
|
|
||||||
#include <tensors/Tensor_traits.h>
|
#include <Grid/tensors/Tensor_traits.h>
|
||||||
#include <tensors/Tensor_class.h>
|
#include <Grid/tensors/Tensor_class.h>
|
||||||
#include <tensors/Tensor_arith.h>
|
#include <Grid/tensors/Tensor_arith.h>
|
||||||
#include <tensors/Tensor_inner.h>
|
#include <Grid/tensors/Tensor_inner.h>
|
||||||
#include <tensors/Tensor_outer.h>
|
#include <Grid/tensors/Tensor_outer.h>
|
||||||
#include <tensors/Tensor_transpose.h>
|
#include <Grid/tensors/Tensor_transpose.h>
|
||||||
#include <tensors/Tensor_trace.h>
|
#include <Grid/tensors/Tensor_trace.h>
|
||||||
#include <tensors/Tensor_index.h>
|
#include <Grid/tensors/Tensor_index.h>
|
||||||
#include <tensors/Tensor_Ta.h>
|
#include <Grid/tensors/Tensor_Ta.h>
|
||||||
#include <tensors/Tensor_determinant.h>
|
#include <Grid/tensors/Tensor_determinant.h>
|
||||||
#include <tensors/Tensor_exp.h>
|
#include <Grid/tensors/Tensor_exp.h>
|
||||||
//#include <tensors/Tensor_peek.h>
|
//#include <Grid/tensors/Tensor_peek.h>
|
||||||
//#include <tensors/Tensor_poke.h>
|
//#include <Grid/tensors/Tensor_poke.h>
|
||||||
#include <tensors/Tensor_reality.h>
|
#include <Grid/tensors/Tensor_reality.h>
|
||||||
#include <tensors/Tensor_unary.h>
|
#include <Grid/tensors/Tensor_unary.h>
|
||||||
#include <tensors/Tensor_extract_merge.h>
|
#include <Grid/tensors/Tensor_extract_merge.h>
|
||||||
#include <tensors/Tensor_logical.h>
|
#include <Grid/tensors/Tensor_logical.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,3 +1,31 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Threads.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_THREADS_H
|
#ifndef GRID_THREADS_H
|
||||||
#define GRID_THREADS_H
|
#define GRID_THREADS_H
|
||||||
|
|
||||||
@ -9,11 +37,20 @@
|
|||||||
|
|
||||||
#ifdef GRID_OMP
|
#ifdef GRID_OMP
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for ")
|
#ifdef GRID_NUMA
|
||||||
|
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
|
||||||
|
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
|
||||||
|
#else
|
||||||
|
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
|
||||||
|
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
|
||||||
|
#endif
|
||||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
|
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
|
||||||
|
#define PARALLEL_REGION _Pragma("omp parallel")
|
||||||
#else
|
#else
|
||||||
#define PARALLEL_FOR_LOOP
|
#define PARALLEL_FOR_LOOP
|
||||||
|
#define PARALLEL_FOR_LOOP_INTERN
|
||||||
#define PARALLEL_NESTED_LOOP2
|
#define PARALLEL_NESTED_LOOP2
|
||||||
|
#define PARALLEL_REGION
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
@ -95,6 +132,22 @@ class GridThread {
|
|||||||
ThreadBarrier();
|
ThreadBarrier();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void bcopy(const void *src, void *dst, size_t len) {
|
||||||
|
#ifdef GRID_OMP
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
const char *c_src =(char *) src;
|
||||||
|
char *c_dest=(char *) dst;
|
||||||
|
int me,mywork,myoff;
|
||||||
|
GridThread::GetWorkBarrier(len,me, mywork,myoff);
|
||||||
|
bcopy(&c_src[myoff],&c_dest[myoff],mywork);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
bcopy(src,dst,len);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
60
lib/Timer.h
60
lib/Timer.h
@ -1,3 +1,31 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Timer.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_TIME_H
|
#ifndef GRID_TIME_H
|
||||||
#define GRID_TIME_H
|
#define GRID_TIME_H
|
||||||
|
|
||||||
@ -11,40 +39,62 @@ namespace Grid {
|
|||||||
// Dress the output; use std::chrono
|
// Dress the output; use std::chrono
|
||||||
|
|
||||||
// C++11 time facilities better?
|
// C++11 time facilities better?
|
||||||
double usecond(void);
|
inline double usecond(void) {
|
||||||
|
struct timeval tv;
|
||||||
|
#ifdef TIMERS_ON
|
||||||
|
gettimeofday(&tv,NULL);
|
||||||
|
#endif
|
||||||
|
return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
|
||||||
|
}
|
||||||
|
|
||||||
typedef std::chrono::system_clock GridClock;
|
typedef std::chrono::system_clock GridClock;
|
||||||
typedef std::chrono::time_point<GridClock> GridTimePoint;
|
typedef std::chrono::time_point<GridClock> GridTimePoint;
|
||||||
typedef std::chrono::milliseconds GridTime;
|
typedef std::chrono::milliseconds GridTime;
|
||||||
|
typedef std::chrono::microseconds GridUsecs;
|
||||||
|
|
||||||
|
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time)
|
||||||
|
{
|
||||||
|
stream << time.count()<<" ms";
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
|
||||||
class GridStopWatch {
|
class GridStopWatch {
|
||||||
private:
|
private:
|
||||||
bool running;
|
bool running;
|
||||||
GridTimePoint start;
|
GridTimePoint start;
|
||||||
GridTime accumulator;
|
GridUsecs accumulator;
|
||||||
public:
|
public:
|
||||||
GridStopWatch () {
|
GridStopWatch () {
|
||||||
Reset();
|
Reset();
|
||||||
}
|
}
|
||||||
void Start(void) {
|
void Start(void) {
|
||||||
assert(running == false);
|
assert(running == false);
|
||||||
|
#ifdef TIMERS_ON
|
||||||
start = GridClock::now();
|
start = GridClock::now();
|
||||||
|
#endif
|
||||||
running = true;
|
running = true;
|
||||||
}
|
}
|
||||||
void Stop(void) {
|
void Stop(void) {
|
||||||
assert(running == true);
|
assert(running == true);
|
||||||
accumulator+= std::chrono::duration_cast<GridTime>(GridClock::now()-start);
|
#ifdef TIMERS_ON
|
||||||
|
accumulator+= std::chrono::duration_cast<GridUsecs>(GridClock::now()-start);
|
||||||
|
#endif
|
||||||
running = false;
|
running = false;
|
||||||
};
|
};
|
||||||
void Reset(void){
|
void Reset(void){
|
||||||
running = false;
|
running = false;
|
||||||
|
#ifdef TIMERS_ON
|
||||||
start = GridClock::now();
|
start = GridClock::now();
|
||||||
accumulator = std::chrono::duration_cast<GridTime>(start-start);
|
#endif
|
||||||
|
accumulator = std::chrono::duration_cast<GridUsecs>(start-start);
|
||||||
}
|
}
|
||||||
GridTime Elapsed(void) {
|
GridTime Elapsed(void) {
|
||||||
assert(running == false);
|
assert(running == false);
|
||||||
return accumulator;
|
return std::chrono::duration_cast<GridTime>( accumulator );
|
||||||
|
}
|
||||||
|
uint64_t useconds(void){
|
||||||
|
assert(running == false);
|
||||||
|
return (uint64_t) accumulator.count();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,7 +1,36 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/CoarsenedMatrix.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H
|
#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H
|
||||||
#define GRID_ALGORITHM_COARSENED_MATRIX_H
|
#define GRID_ALGORITHM_COARSENED_MATRIX_H
|
||||||
|
|
||||||
#include <Grid.h>
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
@ -117,6 +146,56 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
Orthogonalise();
|
Orthogonalise();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual void CreateSubspaceLanczos(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis)
|
||||||
|
{
|
||||||
|
// Run a Lanczos with sloppy convergence
|
||||||
|
const int Nstop = nn;
|
||||||
|
const int Nk = nn+20;
|
||||||
|
const int Np = nn+20;
|
||||||
|
const int Nm = Nk+Np;
|
||||||
|
const int MaxIt= 10000;
|
||||||
|
RealD resid = 1.0e-3;
|
||||||
|
|
||||||
|
Chebyshev<FineField> Cheb(0.5,64.0,21);
|
||||||
|
ImplicitlyRestartedLanczos<FineField> IRL(hermop,Cheb,Nstop,Nk,Nm,resid,MaxIt);
|
||||||
|
// IRL.lock = 1;
|
||||||
|
|
||||||
|
FineField noise(FineGrid); gaussian(RNG,noise);
|
||||||
|
FineField tmp(FineGrid);
|
||||||
|
std::vector<RealD> eval(Nm);
|
||||||
|
std::vector<FineField> evec(Nm,FineGrid);
|
||||||
|
|
||||||
|
int Nconv;
|
||||||
|
IRL.calc(eval,evec,
|
||||||
|
noise,
|
||||||
|
Nconv);
|
||||||
|
|
||||||
|
// pull back nn vectors
|
||||||
|
for(int b=0;b<nn;b++){
|
||||||
|
|
||||||
|
subspace[b] = evec[b];
|
||||||
|
|
||||||
|
std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
|
||||||
|
|
||||||
|
hermop.Op(subspace[b],tmp);
|
||||||
|
std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(tmp)<<std::endl;
|
||||||
|
|
||||||
|
noise = tmp - sqrt(eval[b])*subspace[b] ;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<" ; [ M - Lambda ]_"<<b<<" vec_"<<b<<" = " <<norm2(noise)<<std::endl;
|
||||||
|
|
||||||
|
noise = tmp + eval[b]*subspace[b] ;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<" ; [ M - Lambda ]_"<<b<<" vec_"<<b<<" = " <<norm2(noise)<<std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
Orthogonalise();
|
||||||
|
for(int b=0;b<nn;b++){
|
||||||
|
std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
|
virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
|
||||||
|
|
||||||
RealD scale;
|
RealD scale;
|
||||||
@ -170,11 +249,10 @@ namespace Grid {
|
|||||||
////////////////////
|
////////////////////
|
||||||
Geometry geom;
|
Geometry geom;
|
||||||
GridBase * _grid;
|
GridBase * _grid;
|
||||||
CartesianStencil<siteVector,siteVector,SimpleCompressor<siteVector> > Stencil;
|
CartesianStencil<siteVector,siteVector> Stencil;
|
||||||
|
|
||||||
std::vector<CoarseMatrix> A;
|
std::vector<CoarseMatrix> A;
|
||||||
|
|
||||||
std::vector<siteVector,alignedAllocator<siteVector> > comm_buf;
|
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
// Interface
|
// Interface
|
||||||
@ -187,7 +265,7 @@ namespace Grid {
|
|||||||
conformable(in._grid,out._grid);
|
conformable(in._grid,out._grid);
|
||||||
|
|
||||||
SimpleCompressor<siteVector> compressor;
|
SimpleCompressor<siteVector> compressor;
|
||||||
Stencil.HaloExchange(in,comm_buf,compressor);
|
Stencil.HaloExchange(in,compressor);
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<Grid()->oSites();ss++){
|
for(int ss=0;ss<Grid()->oSites();ss++){
|
||||||
@ -204,7 +282,7 @@ PARALLEL_FOR_LOOP
|
|||||||
} else if(SE->_is_local) {
|
} else if(SE->_is_local) {
|
||||||
nbr = in._odata[SE->_offset];
|
nbr = in._odata[SE->_offset];
|
||||||
} else {
|
} else {
|
||||||
nbr = comm_buf[SE->_offset];
|
nbr = Stencil.CommBuf()[SE->_offset];
|
||||||
}
|
}
|
||||||
res = res + A[point]._odata[ss]*nbr;
|
res = res + A[point]._odata[ss]*nbr;
|
||||||
}
|
}
|
||||||
@ -228,7 +306,6 @@ PARALLEL_FOR_LOOP
|
|||||||
Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements),
|
Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements),
|
||||||
A(geom.npoint,&CoarseGrid)
|
A(geom.npoint,&CoarseGrid)
|
||||||
{
|
{
|
||||||
comm_buf.resize(Stencil._unified_buffer_size);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
|
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
|
||||||
|
@ -1,3 +1,31 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/LinearOperator.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_ALGORITHM_LINEAR_OP_H
|
#ifndef GRID_ALGORITHM_LINEAR_OP_H
|
||||||
#define GRID_ALGORITHM_LINEAR_OP_H
|
#define GRID_ALGORITHM_LINEAR_OP_H
|
||||||
|
|
||||||
@ -194,6 +222,7 @@ namespace Grid {
|
|||||||
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
|
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
|
||||||
virtual RealD Mpc (const Field &in, Field &out) {
|
virtual RealD Mpc (const Field &in, Field &out) {
|
||||||
Field tmp(in._grid);
|
Field tmp(in._grid);
|
||||||
|
// std::cout <<"grid pointers: in._grid="<< in._grid << " out._grid=" << out._grid << " _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl;
|
||||||
|
|
||||||
_Mat.Meooe(in,tmp);
|
_Mat.Meooe(in,tmp);
|
||||||
_Mat.MooeeInv(tmp,out);
|
_Mat.MooeeInv(tmp,out);
|
||||||
@ -223,10 +252,10 @@ namespace Grid {
|
|||||||
virtual RealD Mpc (const Field &in, Field &out) {
|
virtual RealD Mpc (const Field &in, Field &out) {
|
||||||
Field tmp(in._grid);
|
Field tmp(in._grid);
|
||||||
|
|
||||||
_Mat.Meooe(in,tmp);
|
_Mat.Meooe(in,out);
|
||||||
_Mat.MooeeInv(tmp,out);
|
_Mat.MooeeInv(out,tmp);
|
||||||
_Mat.Meooe(out,tmp);
|
_Mat.Meooe(tmp,out);
|
||||||
_Mat.MooeeInv(tmp,out);
|
_Mat.MooeeInv(out,tmp);
|
||||||
|
|
||||||
return axpy_norm(out,-1.0,tmp,in);
|
return axpy_norm(out,-1.0,tmp,in);
|
||||||
}
|
}
|
||||||
@ -242,6 +271,35 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<class Matrix,class Field>
|
||||||
|
class SchurDiagTwoOperator : public SchurOperatorBase<Field> {
|
||||||
|
protected:
|
||||||
|
Matrix &_Mat;
|
||||||
|
public:
|
||||||
|
SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
|
||||||
|
|
||||||
|
virtual RealD Mpc (const Field &in, Field &out) {
|
||||||
|
Field tmp(in._grid);
|
||||||
|
|
||||||
|
_Mat.MooeeInv(in,out);
|
||||||
|
_Mat.Meooe(out,tmp);
|
||||||
|
_Mat.MooeeInv(tmp,out);
|
||||||
|
_Mat.Meooe(out,tmp);
|
||||||
|
|
||||||
|
return axpy_norm(out,-1.0,tmp,in);
|
||||||
|
}
|
||||||
|
virtual RealD MpcDag (const Field &in, Field &out){
|
||||||
|
Field tmp(in._grid);
|
||||||
|
|
||||||
|
_Mat.MeooeDag(in,out);
|
||||||
|
_Mat.MooeeInvDag(out,tmp);
|
||||||
|
_Mat.MeooeDag(tmp,out);
|
||||||
|
_Mat.MooeeInvDag(out,tmp);
|
||||||
|
|
||||||
|
return axpy_norm(out,-1.0,tmp,in);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
// Base classes for functions of operators
|
// Base classes for functions of operators
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/Preconditioner.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_PRECONDITIONER_H
|
#ifndef GRID_PRECONDITIONER_H
|
||||||
#define GRID_PRECONDITIONER_H
|
#define GRID_PRECONDITIONER_H
|
||||||
|
|
||||||
|
@ -1,7 +1,33 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/SparseMatrix.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_ALGORITHM_SPARSE_MATRIX_H
|
#ifndef GRID_ALGORITHM_SPARSE_MATRIX_H
|
||||||
#define GRID_ALGORITHM_SPARSE_MATRIX_H
|
#define GRID_ALGORITHM_SPARSE_MATRIX_H
|
||||||
|
|
||||||
#include <Grid.h>
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -1,8 +1,35 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/approx/Chebyshev.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_CHEBYSHEV_H
|
#ifndef GRID_CHEBYSHEV_H
|
||||||
#define GRID_CHEBYSHEV_H
|
#define GRID_CHEBYSHEV_H
|
||||||
|
|
||||||
#include<Grid.h>
|
#include <Grid/algorithms/LinearOperator.h>
|
||||||
#include<algorithms/LinearOperator.h>
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
@ -30,13 +57,14 @@ namespace Grid {
|
|||||||
Field Mtmp(in._grid);
|
Field Mtmp(in._grid);
|
||||||
AtoN = in;
|
AtoN = in;
|
||||||
out = AtoN*Coeffs[0];
|
out = AtoN*Coeffs[0];
|
||||||
// std::cout <<"Poly in " <<norm2(in)<<std::endl;
|
// std::cout <<"Poly in " <<norm2(in)<<" size "<< Coeffs.size()<<std::endl;
|
||||||
// std::cout <<"0 " <<norm2(out)<<std::endl;
|
// std::cout <<"Coeffs[0]= "<<Coeffs[0]<< " 0 " <<norm2(out)<<std::endl;
|
||||||
for(int n=1;n<Coeffs.size();n++){
|
for(int n=1;n<Coeffs.size();n++){
|
||||||
Mtmp = AtoN;
|
Mtmp = AtoN;
|
||||||
Linop.HermOp(Mtmp,AtoN);
|
Linop.HermOp(Mtmp,AtoN);
|
||||||
out=out+AtoN*Coeffs[n];
|
out=out+AtoN*Coeffs[n];
|
||||||
// std::cout << n<<" " <<norm2(out)<<std::endl;
|
// std::cout <<"Coeffs "<<n<<"= "<< Coeffs[n]<< " 0 " <<std::endl;
|
||||||
|
// std::cout << n<<" " <<norm2(out)<<std::endl;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
@ -54,7 +82,8 @@ namespace Grid {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
void csv(std::ostream &out){
|
void csv(std::ostream &out){
|
||||||
for (RealD x=lo; x<hi; x+=(hi-lo)/1000) {
|
RealD diff = hi-lo;
|
||||||
|
for (RealD x=lo-0.2*diff; x<hi+0.2*diff; x+=(hi-lo)/1000) {
|
||||||
RealD f = approx(x);
|
RealD f = approx(x);
|
||||||
out<< x<<" "<<f<<std::endl;
|
out<< x<<" "<<f<<std::endl;
|
||||||
}
|
}
|
||||||
@ -71,10 +100,24 @@ namespace Grid {
|
|||||||
|
|
||||||
Chebyshev(){};
|
Chebyshev(){};
|
||||||
Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);};
|
Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);};
|
||||||
|
Chebyshev(RealD _lo,RealD _hi,int _order) {Init(_lo,_hi,_order);};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// c.f. numerical recipes "chebft"/"chebev". This is sec 5.8 "Chebyshev approximation".
|
// c.f. numerical recipes "chebft"/"chebev". This is sec 5.8 "Chebyshev approximation".
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// CJ: the one we need for Lanczos
|
||||||
|
void Init(RealD _lo,RealD _hi,int _order)
|
||||||
|
{
|
||||||
|
lo=_lo;
|
||||||
|
hi=_hi;
|
||||||
|
order=_order;
|
||||||
|
|
||||||
|
if(order < 2) exit(-1);
|
||||||
|
Coeffs.resize(order);
|
||||||
|
Coeffs.assign(0.,order);
|
||||||
|
Coeffs[order-1] = 1.;
|
||||||
|
};
|
||||||
|
|
||||||
void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
|
void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
|
||||||
{
|
{
|
||||||
lo=_lo;
|
lo=_lo;
|
||||||
@ -154,6 +197,8 @@ namespace Grid {
|
|||||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||||
|
|
||||||
GridBase *grid=in._grid;
|
GridBase *grid=in._grid;
|
||||||
|
//std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
|
||||||
|
//<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
|
||||||
|
|
||||||
int vol=grid->gSites();
|
int vol=grid->gSites();
|
||||||
|
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/approx/MultiShiftFunction.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
@ -1,3 +1,31 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/approx/MultiShiftFunction.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef MULTI_SHIFT_FUNCTION
|
#ifndef MULTI_SHIFT_FUNCTION
|
||||||
#define MULTI_SHIFT_FUNCTION
|
#define MULTI_SHIFT_FUNCTION
|
||||||
|
|
||||||
|
@ -16,9 +16,13 @@
|
|||||||
#define INCLUDED_ALG_REMEZ_H
|
#define INCLUDED_ALG_REMEZ_H
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
#include <Config.h>
|
||||||
|
|
||||||
//#include <algorithms/approx/bigfloat.h>
|
#ifdef HAVE_LIBGMP
|
||||||
#include <algorithms/approx/bigfloat_double.h>
|
#include "bigfloat.h"
|
||||||
|
#else
|
||||||
|
#include "bigfloat_double.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
||||||
#define SUM_MAX 10 // Maximum number of terms in exponential
|
#define SUM_MAX 10 // Maximum number of terms in exponential
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/approx/bigfloat_double.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
typedef double mfloat;
|
typedef double mfloat;
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/AdefGeneric.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
|
#ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
|
||||||
#define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
|
#define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
|
||||||
|
|
||||||
|
@ -1,105 +1,168 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/ConjugateGradient.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_CONJUGATE_GRADIENT_H
|
#ifndef GRID_CONJUGATE_GRADIENT_H
|
||||||
#define GRID_CONJUGATE_GRADIENT_H
|
#define GRID_CONJUGATE_GRADIENT_H
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
// Base classes for iterative processes based on operators
|
// Base classes for iterative processes based on operators
|
||||||
// single input vec, single output vec.
|
// single input vec, single output vec.
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template<class Field>
|
template <class Field>
|
||||||
class ConjugateGradient : public OperatorFunction<Field> {
|
class ConjugateGradient : public OperatorFunction<Field> {
|
||||||
public:
|
public:
|
||||||
RealD Tolerance;
|
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||||
Integer MaxIterations;
|
// Defaults true.
|
||||||
ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {
|
RealD Tolerance;
|
||||||
};
|
Integer MaxIterations;
|
||||||
|
ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||||
|
: Tolerance(tol),
|
||||||
|
MaxIterations(maxit),
|
||||||
|
ErrorOnNoConverge(err_on_no_conv){};
|
||||||
|
|
||||||
|
void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
|
||||||
|
Field &psi) {
|
||||||
|
psi.checkerboard = src.checkerboard;
|
||||||
|
conformable(psi, src);
|
||||||
|
|
||||||
|
RealD cp, c, a, d, b, ssq, qq, b_pred;
|
||||||
|
|
||||||
|
Field p(src);
|
||||||
|
Field mmp(src);
|
||||||
|
Field r(src);
|
||||||
|
|
||||||
|
// Initial residual computation & set up
|
||||||
|
RealD guess = norm2(psi);
|
||||||
|
assert(std::isnan(guess) == 0);
|
||||||
|
|
||||||
|
|
||||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
Linop.HermOpAndNorm(psi, mmp, d, b);
|
||||||
|
|
||||||
psi.checkerboard = src.checkerboard;
|
|
||||||
conformable(psi,src);
|
|
||||||
|
|
||||||
RealD cp,c,a,d,b,ssq,qq,b_pred;
|
r = src - mmp;
|
||||||
|
p = r;
|
||||||
|
|
||||||
Field p(src);
|
a = norm2(p);
|
||||||
Field mmp(src);
|
cp = a;
|
||||||
Field r(src);
|
ssq = norm2(src);
|
||||||
|
|
||||||
//Initial residual computation & set up
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
RealD guess = norm2(psi);
|
<< "ConjugateGradient: guess " << guess << std::endl;
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: src " << ssq << std::endl;
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: mp " << d << std::endl;
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: mmp " << b << std::endl;
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: cp,r " << cp << std::endl;
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: p " << a << std::endl;
|
||||||
|
|
||||||
Linop.HermOpAndNorm(psi,mmp,d,b);
|
RealD rsq = Tolerance * Tolerance * ssq;
|
||||||
|
|
||||||
r= src-mmp;
|
// Check if guess is really REALLY good :)
|
||||||
p= r;
|
if (cp <= rsq) {
|
||||||
|
return;
|
||||||
a =norm2(p);
|
|
||||||
cp =a;
|
|
||||||
ssq=norm2(src);
|
|
||||||
|
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
|
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
|
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
|
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
|
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: cp,r "<<cp <<std::endl;
|
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
|
|
||||||
|
|
||||||
RealD rsq = Tolerance* Tolerance*ssq;
|
|
||||||
|
|
||||||
//Check if guess is really REALLY good :)
|
|
||||||
if ( cp <= rsq ) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" rsq"<<rsq<<std::endl;
|
|
||||||
|
|
||||||
int k;
|
|
||||||
for (k=1;k<=MaxIterations;k++){
|
|
||||||
|
|
||||||
c=cp;
|
|
||||||
|
|
||||||
Linop.HermOpAndNorm(p,mmp,d,qq);
|
|
||||||
|
|
||||||
RealD qqck = norm2(mmp);
|
|
||||||
ComplexD dck = innerProduct(p,mmp);
|
|
||||||
|
|
||||||
a = c/d;
|
|
||||||
b_pred = a*(a*qq-d)/c;
|
|
||||||
|
|
||||||
cp = axpy_norm(r,-a,mmp,r);
|
|
||||||
b = cp/c;
|
|
||||||
|
|
||||||
// Fuse these loops ; should be really easy
|
|
||||||
psi= a*p+psi;
|
|
||||||
p = p*b+r;
|
|
||||||
|
|
||||||
std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
|
||||||
|
|
||||||
// Stopping condition
|
|
||||||
if ( cp <= rsq ) {
|
|
||||||
|
|
||||||
Linop.HermOpAndNorm(psi,mmp,d,qq);
|
|
||||||
p=mmp-src;
|
|
||||||
|
|
||||||
RealD mmpnorm = sqrt(norm2(mmp));
|
|
||||||
RealD psinorm = sqrt(norm2(psi));
|
|
||||||
RealD srcnorm = sqrt(norm2(src));
|
|
||||||
RealD resnorm = sqrt(norm2(p));
|
|
||||||
RealD true_residual = resnorm/srcnorm;
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k
|
|
||||||
<<" computed residual "<<sqrt(cp/ssq)
|
|
||||||
<<" true residual "<<true_residual
|
|
||||||
<<" target "<<Tolerance<<std::endl;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl;
|
|
||||||
assert(0);
|
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
GridStopWatch LinalgTimer;
|
||||||
|
GridStopWatch MatrixTimer;
|
||||||
|
GridStopWatch SolverTimer;
|
||||||
|
|
||||||
|
SolverTimer.Start();
|
||||||
|
int k;
|
||||||
|
for (k = 1; k <= MaxIterations; k++) {
|
||||||
|
c = cp;
|
||||||
|
|
||||||
|
MatrixTimer.Start();
|
||||||
|
Linop.HermOpAndNorm(p, mmp, d, qq);
|
||||||
|
MatrixTimer.Stop();
|
||||||
|
|
||||||
|
LinalgTimer.Start();
|
||||||
|
// RealD qqck = norm2(mmp);
|
||||||
|
// ComplexD dck = innerProduct(p,mmp);
|
||||||
|
|
||||||
|
a = c / d;
|
||||||
|
b_pred = a * (a * qq - d) / c;
|
||||||
|
|
||||||
|
cp = axpy_norm(r, -a, mmp, r);
|
||||||
|
b = cp / c;
|
||||||
|
|
||||||
|
// Fuse these loops ; should be really easy
|
||||||
|
psi = a * p + psi;
|
||||||
|
p = p * b + r;
|
||||||
|
|
||||||
|
LinalgTimer.Stop();
|
||||||
|
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
|
||||||
|
<< " residual " << cp << " target " << rsq << std::endl;
|
||||||
|
|
||||||
|
// Stopping condition
|
||||||
|
if (cp <= rsq) {
|
||||||
|
SolverTimer.Stop();
|
||||||
|
Linop.HermOpAndNorm(psi, mmp, d, qq);
|
||||||
|
p = mmp - src;
|
||||||
|
|
||||||
|
RealD mmpnorm = sqrt(norm2(mmp));
|
||||||
|
RealD psinorm = sqrt(norm2(psi));
|
||||||
|
RealD srcnorm = sqrt(norm2(src));
|
||||||
|
RealD resnorm = sqrt(norm2(p));
|
||||||
|
RealD true_residual = resnorm / srcnorm;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "ConjugateGradient: Converged on iteration " << k << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq)
|
||||||
|
<< " true residual " << true_residual << " target "
|
||||||
|
<< Tolerance << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Time elapsed: Iterations "
|
||||||
|
<< SolverTimer.Elapsed() << " Matrix "
|
||||||
|
<< MatrixTimer.Elapsed() << " Linalg "
|
||||||
|
<< LinalgTimer.Elapsed();
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
|
||||||
|
<< std::endl;
|
||||||
|
if (ErrorOnNoConverge) assert(0);
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
142
lib/algorithms/iterative/ConjugateGradientMixedPrec.h
Normal file
142
lib/algorithms/iterative/ConjugateGradientMixedPrec.h
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/ConjugateGradientMixedPrec.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#ifndef GRID_CONJUGATE_GRADIENT_MIXED_PREC_H
|
||||||
|
#define GRID_CONJUGATE_GRADIENT_MIXED_PREC_H
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
//Mixed precision restarted defect correction CG
|
||||||
|
template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||||
|
class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
|
||||||
|
public:
|
||||||
|
RealD Tolerance;
|
||||||
|
Integer MaxInnerIterations;
|
||||||
|
Integer MaxOuterIterations;
|
||||||
|
GridBase* SinglePrecGrid; //Grid for single-precision fields
|
||||||
|
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
|
||||||
|
LinearOperatorBase<FieldF> &Linop_f;
|
||||||
|
LinearOperatorBase<FieldD> &Linop_d;
|
||||||
|
|
||||||
|
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
|
||||||
|
LinearFunction<FieldF> *guesser;
|
||||||
|
|
||||||
|
MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
|
||||||
|
Linop_f(_Linop_f), Linop_d(_Linop_d),
|
||||||
|
Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
|
||||||
|
OuterLoopNormMult(100.), guesser(NULL){ };
|
||||||
|
|
||||||
|
void useGuesser(LinearFunction<FieldF> &g){
|
||||||
|
guesser = &g;
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator() (const FieldD &src_d_in, FieldD &sol_d){
|
||||||
|
GridStopWatch TotalTimer;
|
||||||
|
TotalTimer.Start();
|
||||||
|
|
||||||
|
int cb = src_d_in.checkerboard;
|
||||||
|
sol_d.checkerboard = cb;
|
||||||
|
|
||||||
|
RealD src_norm = norm2(src_d_in);
|
||||||
|
RealD stop = src_norm * Tolerance*Tolerance;
|
||||||
|
|
||||||
|
GridBase* DoublePrecGrid = src_d_in._grid;
|
||||||
|
FieldD tmp_d(DoublePrecGrid);
|
||||||
|
tmp_d.checkerboard = cb;
|
||||||
|
|
||||||
|
FieldD tmp2_d(DoublePrecGrid);
|
||||||
|
tmp2_d.checkerboard = cb;
|
||||||
|
|
||||||
|
FieldD src_d(DoublePrecGrid);
|
||||||
|
src_d = src_d_in; //source for next inner iteration, computed from residual during operation
|
||||||
|
|
||||||
|
RealD inner_tol = Tolerance;
|
||||||
|
|
||||||
|
FieldF src_f(SinglePrecGrid);
|
||||||
|
src_f.checkerboard = cb;
|
||||||
|
|
||||||
|
FieldF sol_f(SinglePrecGrid);
|
||||||
|
sol_f.checkerboard = cb;
|
||||||
|
|
||||||
|
ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations);
|
||||||
|
CG_f.ErrorOnNoConverge = false;
|
||||||
|
|
||||||
|
GridStopWatch InnerCGtimer;
|
||||||
|
|
||||||
|
GridStopWatch PrecChangeTimer;
|
||||||
|
|
||||||
|
for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
|
||||||
|
//Compute double precision rsd and also new RHS vector.
|
||||||
|
Linop_d.HermOp(sol_d, tmp_d);
|
||||||
|
RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl;
|
||||||
|
|
||||||
|
if(norm < OuterLoopNormMult * stop){
|
||||||
|
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration converged on iteration " <<outer_iter <<std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
while(norm * inner_tol * inner_tol < stop) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ??
|
||||||
|
|
||||||
|
PrecChangeTimer.Start();
|
||||||
|
precisionChange(src_f, src_d);
|
||||||
|
PrecChangeTimer.Stop();
|
||||||
|
|
||||||
|
zeroit(sol_f);
|
||||||
|
|
||||||
|
//Optionally improve inner solver guess (eg using known eigenvectors)
|
||||||
|
if(guesser != NULL)
|
||||||
|
(*guesser)(src_f, sol_f);
|
||||||
|
|
||||||
|
//Inner CG
|
||||||
|
CG_f.Tolerance = inner_tol;
|
||||||
|
InnerCGtimer.Start();
|
||||||
|
CG_f(Linop_f, src_f, sol_f);
|
||||||
|
InnerCGtimer.Stop();
|
||||||
|
|
||||||
|
//Convert sol back to double and add to double prec solution
|
||||||
|
PrecChangeTimer.Start();
|
||||||
|
precisionChange(tmp_d, sol_f);
|
||||||
|
PrecChangeTimer.Stop();
|
||||||
|
|
||||||
|
axpy(sol_d, 1.0, tmp_d, sol_d);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Final trial CG
|
||||||
|
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl;
|
||||||
|
|
||||||
|
ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
|
||||||
|
CG_d(Linop_d, src_d_in, sol_d);
|
||||||
|
|
||||||
|
TotalTimer.Stop();
|
||||||
|
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -1,3 +1,31 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H
|
#ifndef GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H
|
||||||
#define GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H
|
#define GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H
|
||||||
|
|
||||||
@ -246,7 +274,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
|||||||
}
|
}
|
||||||
// ugly hack
|
// ugly hack
|
||||||
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
||||||
assert(0);
|
// assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -1,3 +1,31 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/ConjugateResidual.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_CONJUGATE_RESIDUAL_H
|
#ifndef GRID_CONJUGATE_RESIDUAL_H
|
||||||
#define GRID_CONJUGATE_RESIDUAL_H
|
#define GRID_CONJUGATE_RESIDUAL_H
|
||||||
|
|
||||||
|
@ -1,3 +1,31 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/DenseMatrix.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_DENSE_MATRIX_H
|
#ifndef GRID_DENSE_MATRIX_H
|
||||||
#define GRID_DENSE_MATRIX_H
|
#define GRID_DENSE_MATRIX_H
|
||||||
|
|
||||||
@ -102,8 +130,8 @@ DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#include <algorithms/iterative/Householder.h>
|
#include "Householder.h"
|
||||||
#include <algorithms/iterative/Francis.h>
|
#include "Francis.h"
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/EigenSort.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_EIGENSORT_H
|
#ifndef GRID_EIGENSORT_H
|
||||||
#define GRID_EIGENSORT_H
|
#define GRID_EIGENSORT_H
|
||||||
|
|
||||||
@ -11,32 +38,34 @@ template<class Field>
|
|||||||
class SortEigen {
|
class SortEigen {
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
//hacking for testing for now
|
||||||
|
private:
|
||||||
static bool less_lmd(RealD left,RealD right){
|
static bool less_lmd(RealD left,RealD right){
|
||||||
return fabs(left) < fabs(right);
|
return left > right;
|
||||||
}
|
}
|
||||||
static bool less_pair(std::pair<RealD,Field>& left,
|
static bool less_pair(std::pair<RealD,Field const*>& left,
|
||||||
std::pair<RealD,Field>& right){
|
std::pair<RealD,Field const*>& right){
|
||||||
return fabs(left.first) < fabs(right.first);
|
return left.first > (right.first);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
void push(DenseVector<RealD>& lmd,
|
void push(DenseVector<RealD>& lmd,
|
||||||
DenseVector<Field>& evec,int N) {
|
DenseVector<Field>& evec,int N) {
|
||||||
|
DenseVector<Field> cpy(lmd.size(),evec[0]._grid);
|
||||||
|
for(int i=0;i<lmd.size();i++) cpy[i] = evec[i];
|
||||||
|
|
||||||
DenseVector<std::pair<RealD, Field> > emod;
|
DenseVector<std::pair<RealD, Field const*> > emod(lmd.size());
|
||||||
typename DenseVector<std::pair<RealD, Field> >::iterator it;
|
for(int i=0;i<lmd.size();++i)
|
||||||
|
emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]);
|
||||||
for(int i=0;i<lmd.size();++i){
|
|
||||||
emod.push_back(std::pair<RealD,Field>(lmd[i],evec[i]));
|
|
||||||
}
|
|
||||||
|
|
||||||
partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair);
|
partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair);
|
||||||
|
|
||||||
it=emod.begin();
|
typename DenseVector<std::pair<RealD, Field const*> >::iterator it = emod.begin();
|
||||||
for(int i=0;i<N;++i){
|
for(int i=0;i<N;++i){
|
||||||
lmd[i]=it->first;
|
lmd[i]=it->first;
|
||||||
evec[i]=it->second;
|
evec[i]=*(it->second);
|
||||||
++it;
|
++it;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/Francis.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef FRANCIS_H
|
#ifndef FRANCIS_H
|
||||||
#define FRANCIS_H
|
#define FRANCIS_H
|
||||||
|
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/Householder.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef HOUSEHOLDER_H
|
#ifndef HOUSEHOLDER_H
|
||||||
#define HOUSEHOLDER_H
|
#define HOUSEHOLDER_H
|
||||||
|
|
||||||
|
@ -1,8 +1,44 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_IRL_H
|
#ifndef GRID_IRL_H
|
||||||
#define GRID_IRL_H
|
#define GRID_IRL_H
|
||||||
|
|
||||||
#include <algorithms/iterative/DenseMatrix.h>
|
#include <string.h> //memset
|
||||||
#include <algorithms/iterative/EigenSort.h>
|
#ifdef USE_LAPACK
|
||||||
|
void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
|
||||||
|
double *vl, double *vu, int *il, int *iu, double *abstol,
|
||||||
|
int *m, double *w, double *z, int *ldz, int *isuppz,
|
||||||
|
double *work, int *lwork, int *iwork, int *liwork,
|
||||||
|
int *info);
|
||||||
|
#endif
|
||||||
|
#include "DenseMatrix.h"
|
||||||
|
#include "EigenSort.h"
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
@ -21,6 +57,7 @@ public:
|
|||||||
int Niter;
|
int Niter;
|
||||||
int converged;
|
int converged;
|
||||||
|
|
||||||
|
int Nstop; // Number of evecs checked for convergence
|
||||||
int Nk; // Number of converged sought
|
int Nk; // Number of converged sought
|
||||||
int Np; // Np -- Number of spare vecs in kryloc space
|
int Np; // Np -- Number of spare vecs in kryloc space
|
||||||
int Nm; // Nm -- total number of vectors
|
int Nm; // Nm -- total number of vectors
|
||||||
@ -29,6 +66,8 @@ public:
|
|||||||
|
|
||||||
SortEigen<Field> _sort;
|
SortEigen<Field> _sort;
|
||||||
|
|
||||||
|
// GridCartesian &_fgrid;
|
||||||
|
|
||||||
LinearOperatorBase<Field> &_Linop;
|
LinearOperatorBase<Field> &_Linop;
|
||||||
|
|
||||||
OperatorFunction<Field> &_poly;
|
OperatorFunction<Field> &_poly;
|
||||||
@ -39,7 +78,27 @@ public:
|
|||||||
void init(void){};
|
void init(void){};
|
||||||
void Abort(int ff, DenseVector<RealD> &evals, DenseVector<DenseVector<RealD> > &evecs);
|
void Abort(int ff, DenseVector<RealD> &evals, DenseVector<DenseVector<RealD> > &evecs);
|
||||||
|
|
||||||
ImplicitlyRestartedLanczos(LinearOperatorBase<Field> &Linop, // op
|
ImplicitlyRestartedLanczos(
|
||||||
|
LinearOperatorBase<Field> &Linop, // op
|
||||||
|
OperatorFunction<Field> & poly, // polynmial
|
||||||
|
int _Nstop, // sought vecs
|
||||||
|
int _Nk, // sought vecs
|
||||||
|
int _Nm, // spare vecs
|
||||||
|
RealD _eresid, // resid in lmdue deficit
|
||||||
|
int _Niter) : // Max iterations
|
||||||
|
_Linop(Linop),
|
||||||
|
_poly(poly),
|
||||||
|
Nstop(_Nstop),
|
||||||
|
Nk(_Nk),
|
||||||
|
Nm(_Nm),
|
||||||
|
eresid(_eresid),
|
||||||
|
Niter(_Niter)
|
||||||
|
{
|
||||||
|
Np = Nm-Nk; assert(Np>0);
|
||||||
|
};
|
||||||
|
|
||||||
|
ImplicitlyRestartedLanczos(
|
||||||
|
LinearOperatorBase<Field> &Linop, // op
|
||||||
OperatorFunction<Field> & poly, // polynmial
|
OperatorFunction<Field> & poly, // polynmial
|
||||||
int _Nk, // sought vecs
|
int _Nk, // sought vecs
|
||||||
int _Nm, // spare vecs
|
int _Nm, // spare vecs
|
||||||
@ -47,6 +106,7 @@ public:
|
|||||||
int _Niter) : // Max iterations
|
int _Niter) : // Max iterations
|
||||||
_Linop(Linop),
|
_Linop(Linop),
|
||||||
_poly(poly),
|
_poly(poly),
|
||||||
|
Nstop(_Nk),
|
||||||
Nk(_Nk),
|
Nk(_Nk),
|
||||||
Nm(_Nm),
|
Nm(_Nm),
|
||||||
eresid(_eresid),
|
eresid(_eresid),
|
||||||
@ -114,10 +174,11 @@ public:
|
|||||||
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
|
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
|
||||||
// 7. vk+1 := wk/βk+1
|
// 7. vk+1 := wk/βk+1
|
||||||
|
|
||||||
|
// std::cout << "alpha = " << zalph << " beta "<<beta<<std::endl;
|
||||||
const RealD tiny = 1.0e-20;
|
const RealD tiny = 1.0e-20;
|
||||||
if ( beta < tiny ) {
|
if ( beta < tiny ) {
|
||||||
std::cout << " beta is tiny "<<beta<<std::endl;
|
std::cout << " beta is tiny "<<beta<<std::endl;
|
||||||
}
|
}
|
||||||
lmd[k] = alph;
|
lmd[k] = alph;
|
||||||
lme[k] = beta;
|
lme[k] = beta;
|
||||||
|
|
||||||
@ -191,15 +252,122 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef USE_LAPACK
|
||||||
|
void diagonalize_lapack(DenseVector<RealD>& lmd,
|
||||||
|
DenseVector<RealD>& lme,
|
||||||
|
int N1,
|
||||||
|
int N2,
|
||||||
|
DenseVector<RealD>& Qt,
|
||||||
|
GridBase *grid){
|
||||||
|
const int size = Nm;
|
||||||
|
// tevals.resize(size);
|
||||||
|
// tevecs.resize(size);
|
||||||
|
int NN = N1;
|
||||||
|
double evals_tmp[NN];
|
||||||
|
double evec_tmp[NN][NN];
|
||||||
|
memset(evec_tmp[0],0,sizeof(double)*NN*NN);
|
||||||
|
// double AA[NN][NN];
|
||||||
|
double DD[NN];
|
||||||
|
double EE[NN];
|
||||||
|
for (int i = 0; i< NN; i++)
|
||||||
|
for (int j = i - 1; j <= i + 1; j++)
|
||||||
|
if ( j < NN && j >= 0 ) {
|
||||||
|
if (i==j) DD[i] = lmd[i];
|
||||||
|
if (i==j) evals_tmp[i] = lmd[i];
|
||||||
|
if (j==(i-1)) EE[j] = lme[j];
|
||||||
|
}
|
||||||
|
int evals_found;
|
||||||
|
int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
|
||||||
|
int liwork = 3+NN*10 ;
|
||||||
|
int iwork[liwork];
|
||||||
|
double work[lwork];
|
||||||
|
int isuppz[2*NN];
|
||||||
|
char jobz = 'V'; // calculate evals & evecs
|
||||||
|
char range = 'I'; // calculate all evals
|
||||||
|
// char range = 'A'; // calculate all evals
|
||||||
|
char uplo = 'U'; // refer to upper half of original matrix
|
||||||
|
char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
|
||||||
|
int ifail[NN];
|
||||||
|
int info;
|
||||||
|
// int total = QMP_get_number_of_nodes();
|
||||||
|
// int node = QMP_get_node_number();
|
||||||
|
// GridBase *grid = evec[0]._grid;
|
||||||
|
int total = grid->_Nprocessors;
|
||||||
|
int node = grid->_processor;
|
||||||
|
int interval = (NN/total)+1;
|
||||||
|
double vl = 0.0, vu = 0.0;
|
||||||
|
int il = interval*node+1 , iu = interval*(node+1);
|
||||||
|
if (iu > NN) iu=NN;
|
||||||
|
double tol = 0.0;
|
||||||
|
if (1) {
|
||||||
|
memset(evals_tmp,0,sizeof(double)*NN);
|
||||||
|
if ( il <= NN){
|
||||||
|
printf("total=%d node=%d il=%d iu=%d\n",total,node,il,iu);
|
||||||
|
LAPACK_dstegr(&jobz, &range, &NN,
|
||||||
|
(double*)DD, (double*)EE,
|
||||||
|
&vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
|
||||||
|
&tol, // tolerance
|
||||||
|
&evals_found, evals_tmp, (double*)evec_tmp, &NN,
|
||||||
|
isuppz,
|
||||||
|
work, &lwork, iwork, &liwork,
|
||||||
|
&info);
|
||||||
|
for (int i = iu-1; i>= il-1; i--){
|
||||||
|
printf("node=%d evals_found=%d evals_tmp[%d] = %g\n",node,evals_found, i - (il-1),evals_tmp[i - (il-1)]);
|
||||||
|
evals_tmp[i] = evals_tmp[i - (il-1)];
|
||||||
|
if (il>1) evals_tmp[i-(il-1)]=0.;
|
||||||
|
for (int j = 0; j< NN; j++){
|
||||||
|
evec_tmp[i][j] = evec_tmp[i - (il-1)][j];
|
||||||
|
if (il>1) evec_tmp[i-(il-1)][j]=0.;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
// QMP_sum_double_array(evals_tmp,NN);
|
||||||
|
// QMP_sum_double_array((double *)evec_tmp,NN*NN);
|
||||||
|
grid->GlobalSumVector(evals_tmp,NN);
|
||||||
|
grid->GlobalSumVector((double*)evec_tmp,NN*NN);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order.
|
||||||
|
for(int i=0;i<NN;i++){
|
||||||
|
for(int j=0;j<NN;j++)
|
||||||
|
Qt[(NN-1-i)*N2+j]=evec_tmp[i][j];
|
||||||
|
lmd [NN-1-i]=evals_tmp[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
void diagonalize(DenseVector<RealD>& lmd,
|
void diagonalize(DenseVector<RealD>& lmd,
|
||||||
DenseVector<RealD>& lme,
|
DenseVector<RealD>& lme,
|
||||||
int Nm2,
|
int N2,
|
||||||
int Nm,
|
int N1,
|
||||||
DenseVector<RealD>& Qt)
|
DenseVector<RealD>& Qt,
|
||||||
|
GridBase *grid)
|
||||||
{
|
{
|
||||||
int Niter = 100*Nm;
|
|
||||||
|
#ifdef USE_LAPACK
|
||||||
|
const int check_lapack=0; // just use lapack if 0, check against lapack if 1
|
||||||
|
|
||||||
|
if(!check_lapack)
|
||||||
|
return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid);
|
||||||
|
|
||||||
|
DenseVector <RealD> lmd2(N1);
|
||||||
|
DenseVector <RealD> lme2(N1);
|
||||||
|
DenseVector<RealD> Qt2(N1*N1);
|
||||||
|
for(int k=0; k<N1; ++k){
|
||||||
|
lmd2[k] = lmd[k];
|
||||||
|
lme2[k] = lme[k];
|
||||||
|
}
|
||||||
|
for(int k=0; k<N1*N1; ++k)
|
||||||
|
Qt2[k] = Qt[k];
|
||||||
|
|
||||||
|
// diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int Niter = 100*N1;
|
||||||
int kmin = 1;
|
int kmin = 1;
|
||||||
int kmax = Nk;
|
int kmax = N2;
|
||||||
// (this should be more sophisticated)
|
// (this should be more sophisticated)
|
||||||
|
|
||||||
for(int iter=0; iter<Niter; ++iter){
|
for(int iter=0; iter<Niter; ++iter){
|
||||||
@ -211,7 +379,7 @@ public:
|
|||||||
// (Dsh: shift)
|
// (Dsh: shift)
|
||||||
|
|
||||||
// transformation
|
// transformation
|
||||||
qr_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax);
|
qr_decomp(lmd,lme,N2,N1,Qt,Dsh,kmin,kmax);
|
||||||
|
|
||||||
// Convergence criterion (redef of kmin and kamx)
|
// Convergence criterion (redef of kmin and kamx)
|
||||||
for(int j=kmax-1; j>= kmin; --j){
|
for(int j=kmax-1; j>= kmin; --j){
|
||||||
@ -222,6 +390,23 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Niter = iter;
|
Niter = iter;
|
||||||
|
#ifdef USE_LAPACK
|
||||||
|
if(check_lapack){
|
||||||
|
const double SMALL=1e-8;
|
||||||
|
diagonalize_lapack(lmd2,lme2,N2,N1,Qt2,grid);
|
||||||
|
DenseVector <RealD> lmd3(N2);
|
||||||
|
for(int k=0; k<N2; ++k) lmd3[k]=lmd[k];
|
||||||
|
_sort.push(lmd3,N2);
|
||||||
|
_sort.push(lmd2,N2);
|
||||||
|
for(int k=0; k<N2; ++k){
|
||||||
|
if (fabs(lmd2[k] - lmd3[k]) >SMALL) std::cout <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl;
|
||||||
|
// if (fabs(lme2[k] - lme[k]) >SMALL) std::cout <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl;
|
||||||
|
}
|
||||||
|
for(int k=0; k<N1*N1; ++k){
|
||||||
|
// if (fabs(Qt2[k] - Qt[k]) >SMALL) std::cout <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
return;
|
return;
|
||||||
|
|
||||||
continued:
|
continued:
|
||||||
@ -237,6 +422,7 @@ public:
|
|||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 1
|
||||||
static RealD normalise(Field& v)
|
static RealD normalise(Field& v)
|
||||||
{
|
{
|
||||||
RealD nn = norm2(v);
|
RealD nn = norm2(v);
|
||||||
@ -298,6 +484,7 @@ until convergence
|
|||||||
{
|
{
|
||||||
|
|
||||||
GridBase *grid = evec[0]._grid;
|
GridBase *grid = evec[0]._grid;
|
||||||
|
assert(grid == src._grid);
|
||||||
|
|
||||||
std::cout << " -- Nk = " << Nk << " Np = "<< Np << std::endl;
|
std::cout << " -- Nk = " << Nk << " Np = "<< Np << std::endl;
|
||||||
std::cout << " -- Nm = " << Nm << std::endl;
|
std::cout << " -- Nm = " << Nm << std::endl;
|
||||||
@ -328,11 +515,21 @@ until convergence
|
|||||||
// (uniform vector) Why not src??
|
// (uniform vector) Why not src??
|
||||||
// evec[0] = 1.0;
|
// evec[0] = 1.0;
|
||||||
evec[0] = src;
|
evec[0] = src;
|
||||||
|
std:: cout <<"norm2(src)= " << norm2(src)<<std::endl;
|
||||||
|
// << src._grid << std::endl;
|
||||||
normalise(evec[0]);
|
normalise(evec[0]);
|
||||||
|
std:: cout <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl;
|
||||||
|
// << evec[0]._grid << std::endl;
|
||||||
|
|
||||||
// Initial Nk steps
|
// Initial Nk steps
|
||||||
for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
|
for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
|
||||||
|
// std:: cout <<"norm2(evec[1])= " << norm2(evec[1]) << std::endl;
|
||||||
|
// std:: cout <<"norm2(evec[2])= " << norm2(evec[2]) << std::endl;
|
||||||
RitzMatrix(evec,Nk);
|
RitzMatrix(evec,Nk);
|
||||||
|
for(int k=0; k<Nk; ++k){
|
||||||
|
// std:: cout <<"eval " << k << " " <<eval[k] << std::endl;
|
||||||
|
// std:: cout <<"lme " << k << " " << lme[k] << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
// Restarting loop begins
|
// Restarting loop begins
|
||||||
for(int iter = 0; iter<Niter; ++iter){
|
for(int iter = 0; iter<Niter; ++iter){
|
||||||
@ -354,20 +551,24 @@ until convergence
|
|||||||
lme2[k] = lme[k+k1-1];
|
lme2[k] = lme[k+k1-1];
|
||||||
}
|
}
|
||||||
setUnit_Qt(Nm,Qt);
|
setUnit_Qt(Nm,Qt);
|
||||||
diagonalize(eval2,lme2,Nm,Nm,Qt);
|
diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
|
||||||
|
|
||||||
// sorting
|
// sorting
|
||||||
_sort.push(eval2,Nm);
|
_sort.push(eval2,Nm);
|
||||||
|
|
||||||
// Implicitly shifted QR transformations
|
// Implicitly shifted QR transformations
|
||||||
setUnit_Qt(Nm,Qt);
|
setUnit_Qt(Nm,Qt);
|
||||||
for(int ip=k2; ip<Nm; ++ip)
|
for(int ip=k2; ip<Nm; ++ip){
|
||||||
|
std::cout << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl;
|
||||||
qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
|
qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
for(int i=0; i<(Nk+1); ++i) B[i] = 0.0;
|
for(int i=0; i<(Nk+1); ++i) B[i] = 0.0;
|
||||||
|
|
||||||
for(int j=k1-1; j<k2+1; ++j){
|
for(int j=k1-1; j<k2+1; ++j){
|
||||||
for(int k=0; k<Nm; ++k){
|
for(int k=0; k<Nm; ++k){
|
||||||
|
B[j].checkerboard = evec[k].checkerboard;
|
||||||
B[j] += Qt[k+Nm*j] * evec[k];
|
B[j] += Qt[k+Nm*j] * evec[k];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -390,21 +591,25 @@ until convergence
|
|||||||
lme2[k] = lme[k];
|
lme2[k] = lme[k];
|
||||||
}
|
}
|
||||||
setUnit_Qt(Nm,Qt);
|
setUnit_Qt(Nm,Qt);
|
||||||
diagonalize(eval2,lme2,Nk,Nm,Qt);
|
diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
|
||||||
|
|
||||||
for(int k = 0; k<Nk; ++k) B[k]=0.0;
|
for(int k = 0; k<Nk; ++k) B[k]=0.0;
|
||||||
|
|
||||||
for(int j = 0; j<Nk; ++j){
|
for(int j = 0; j<Nk; ++j){
|
||||||
for(int k = 0; k<Nk; ++k){
|
for(int k = 0; k<Nk; ++k){
|
||||||
|
B[j].checkerboard = evec[k].checkerboard;
|
||||||
B[j] += Qt[k+j*Nm] * evec[k];
|
B[j] += Qt[k+j*Nm] * evec[k];
|
||||||
}
|
}
|
||||||
|
// std::cout << "norm(B["<<j<<"])="<<norm2(B[j])<<std::endl;
|
||||||
}
|
}
|
||||||
|
// _sort.push(eval2,B,Nk);
|
||||||
|
|
||||||
Nconv = 0;
|
Nconv = 0;
|
||||||
// std::cout << std::setiosflags(std::ios_base::scientific);
|
// std::cout << std::setiosflags(std::ios_base::scientific);
|
||||||
for(int i=0; i<Nk; ++i){
|
for(int i=0; i<Nk; ++i){
|
||||||
|
|
||||||
_poly(_Linop,B[i],v);
|
// _poly(_Linop,B[i],v);
|
||||||
|
_Linop.HermOp(B[i],v);
|
||||||
|
|
||||||
RealD vnum = real(innerProduct(B[i],v)); // HermOp.
|
RealD vnum = real(innerProduct(B[i],v)); // HermOp.
|
||||||
RealD vden = norm2(B[i]);
|
RealD vden = norm2(B[i]);
|
||||||
@ -412,11 +617,13 @@ until convergence
|
|||||||
v -= eval2[i]*B[i];
|
v -= eval2[i]*B[i];
|
||||||
RealD vv = norm2(v);
|
RealD vv = norm2(v);
|
||||||
|
|
||||||
|
std::cout.precision(13);
|
||||||
std::cout << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
|
std::cout << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
|
||||||
std::cout << "eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i];
|
std::cout << "eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i];
|
||||||
std::cout <<" |H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv<< std::endl;
|
std::cout <<" |H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv<< std::endl;
|
||||||
|
|
||||||
if(vv<eresid*eresid){
|
// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
|
||||||
|
if((vv<eresid*eresid) && (i == Nconv) ){
|
||||||
Iconv[Nconv] = i;
|
Iconv[Nconv] = i;
|
||||||
++Nconv;
|
++Nconv;
|
||||||
}
|
}
|
||||||
@ -427,7 +634,7 @@ until convergence
|
|||||||
|
|
||||||
std::cout<<" #modes converged: "<<Nconv<<std::endl;
|
std::cout<<" #modes converged: "<<Nconv<<std::endl;
|
||||||
|
|
||||||
if( Nconv>=Nk ){
|
if( Nconv>=Nstop ){
|
||||||
goto converged;
|
goto converged;
|
||||||
}
|
}
|
||||||
} // end of iter loop
|
} // end of iter loop
|
||||||
@ -436,21 +643,20 @@ until convergence
|
|||||||
abort();
|
abort();
|
||||||
|
|
||||||
converged:
|
converged:
|
||||||
// Sorting
|
// Sorting
|
||||||
|
eval.resize(Nconv);
|
||||||
|
evec.resize(Nconv,grid);
|
||||||
|
for(int i=0; i<Nconv; ++i){
|
||||||
|
eval[i] = eval2[Iconv[i]];
|
||||||
|
evec[i] = B[Iconv[i]];
|
||||||
|
}
|
||||||
|
_sort.push(eval,evec,Nconv);
|
||||||
|
|
||||||
eval.clear();
|
std::cout << "\n Converged\n Summary :\n";
|
||||||
evec.clear();
|
std::cout << " -- Iterations = "<< Nconv << "\n";
|
||||||
for(int i=0; i<Nconv; ++i){
|
std::cout << " -- beta(k) = "<< beta_k << "\n";
|
||||||
eval.push_back(eval2[Iconv[i]]);
|
std::cout << " -- Nconv = "<< Nconv << "\n";
|
||||||
evec.push_back(B[Iconv[i]]);
|
}
|
||||||
}
|
|
||||||
_sort.push(eval,evec,Nconv);
|
|
||||||
|
|
||||||
std::cout << "\n Converged\n Summary :\n";
|
|
||||||
std::cout << " -- Iterations = "<< Nconv << "\n";
|
|
||||||
std::cout << " -- beta(k) = "<< beta_k << "\n";
|
|
||||||
std::cout << " -- Nconv = "<< Nconv << "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////
|
/////////////////////////////////////////////////
|
||||||
// Adapted from Rudy's lanczos factor routine
|
// Adapted from Rudy's lanczos factor routine
|
||||||
@ -997,6 +1203,7 @@ static void Lock(DenseMatrix<T> &H, ///Hess mtx
|
|||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/Matrix.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef MATRIX_H
|
#ifndef MATRIX_H
|
||||||
#define MATRIX_H
|
#define MATRIX_H
|
||||||
|
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/MatrixUtils.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_MATRIX_UTILS_H
|
#ifndef GRID_MATRIX_UTILS_H
|
||||||
#define GRID_MATRIX_UTILS_H
|
#define GRID_MATRIX_UTILS_H
|
||||||
|
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/NormalEquations.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_NORMAL_EQUATIONS_H
|
#ifndef GRID_NORMAL_EQUATIONS_H
|
||||||
#define GRID_NORMAL_EQUATIONS_H
|
#define GRID_NORMAL_EQUATIONS_H
|
||||||
|
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/PrecConjugateResidual.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_PREC_CONJUGATE_RESIDUAL_H
|
#ifndef GRID_PREC_CONJUGATE_RESIDUAL_H
|
||||||
#define GRID_PREC_CONJUGATE_RESIDUAL_H
|
#define GRID_PREC_CONJUGATE_RESIDUAL_H
|
||||||
|
|
||||||
|
@ -1,3 +1,31 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_PREC_GCR_H
|
#ifndef GRID_PREC_GCR_H
|
||||||
#define GRID_PREC_GCR_H
|
#define GRID_PREC_GCR_H
|
||||||
|
|
||||||
@ -19,6 +47,10 @@ namespace Grid {
|
|||||||
int mmax;
|
int mmax;
|
||||||
int nstep;
|
int nstep;
|
||||||
int steps;
|
int steps;
|
||||||
|
GridStopWatch PrecTimer;
|
||||||
|
GridStopWatch MatTimer;
|
||||||
|
GridStopWatch LinalgTimer;
|
||||||
|
|
||||||
LinearFunction<Field> &Preconditioner;
|
LinearFunction<Field> &Preconditioner;
|
||||||
|
|
||||||
PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec,int _mmax,int _nstep) :
|
PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec,int _mmax,int _nstep) :
|
||||||
@ -40,14 +72,24 @@ namespace Grid {
|
|||||||
|
|
||||||
Field r(src._grid);
|
Field r(src._grid);
|
||||||
|
|
||||||
|
PrecTimer.Reset();
|
||||||
|
MatTimer.Reset();
|
||||||
|
LinalgTimer.Reset();
|
||||||
|
|
||||||
|
GridStopWatch SolverTimer;
|
||||||
|
SolverTimer.Start();
|
||||||
|
|
||||||
steps=0;
|
steps=0;
|
||||||
for(int k=0;k<MaxIterations;k++){
|
for(int k=0;k<MaxIterations;k++){
|
||||||
|
|
||||||
cp=GCRnStep(Linop,src,psi,rsq);
|
cp=GCRnStep(Linop,src,psi,rsq);
|
||||||
|
|
||||||
if ( verbose ) std::cout<<GridLogMessage<<"VPGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<std::endl;
|
std::cout<<GridLogMessage<<"VPGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<std::endl;
|
||||||
|
|
||||||
if(cp<rsq) {
|
if(cp<rsq) {
|
||||||
|
|
||||||
|
SolverTimer.Stop();
|
||||||
|
|
||||||
Linop.HermOp(psi,r);
|
Linop.HermOp(psi,r);
|
||||||
axpy(r,-1.0,src,r);
|
axpy(r,-1.0,src,r);
|
||||||
RealD tr = norm2(r);
|
RealD tr = norm2(r);
|
||||||
@ -55,6 +97,11 @@ namespace Grid {
|
|||||||
<< " computed residual "<<sqrt(cp/ssq)
|
<< " computed residual "<<sqrt(cp/ssq)
|
||||||
<< " true residual " <<sqrt(tr/ssq)
|
<< " true residual " <<sqrt(tr/ssq)
|
||||||
<< " target " <<Tolerance <<std::endl;
|
<< " target " <<Tolerance <<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Total "<< SolverTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Precon "<< PrecTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Matrix "<< MatTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Linalg "<< LinalgTimer.Elapsed() <<std::endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -62,6 +109,7 @@ namespace Grid {
|
|||||||
std::cout<<GridLogMessage<<"Variable Preconditioned GCR did not converge"<<std::endl;
|
std::cout<<GridLogMessage<<"Variable Preconditioned GCR did not converge"<<std::endl;
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
RealD GCRnStep(LinearOperatorBase<Field> &Linop,const Field &src, Field &psi,RealD rsq){
|
RealD GCRnStep(LinearOperatorBase<Field> &Linop,const Field &src, Field &psi,RealD rsq){
|
||||||
|
|
||||||
RealD cp;
|
RealD cp;
|
||||||
@ -88,24 +136,25 @@ namespace Grid {
|
|||||||
// initial guess x0 is taken as nonzero.
|
// initial guess x0 is taken as nonzero.
|
||||||
// r0=src-A x0 = src
|
// r0=src-A x0 = src
|
||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
|
MatTimer.Start();
|
||||||
Linop.HermOpAndNorm(psi,Az,zAz,zAAz);
|
Linop.HermOpAndNorm(psi,Az,zAz,zAAz);
|
||||||
|
MatTimer.Stop();
|
||||||
r=src-Az;
|
r=src-Az;
|
||||||
|
|
||||||
/////////////////////
|
/////////////////////
|
||||||
// p = Prec(r)
|
// p = Prec(r)
|
||||||
/////////////////////
|
/////////////////////
|
||||||
|
PrecTimer.Start();
|
||||||
Preconditioner(r,z);
|
Preconditioner(r,z);
|
||||||
|
PrecTimer.Stop();
|
||||||
|
|
||||||
std::cout<<GridLogMessage<< " Preconditioner in " << norm2(r)<<std::endl;
|
MatTimer.Start();
|
||||||
std::cout<<GridLogMessage<< " Preconditioner out " << norm2(z)<<std::endl;
|
|
||||||
|
|
||||||
Linop.HermOp(z,tmp);
|
Linop.HermOp(z,tmp);
|
||||||
|
MatTimer.Stop();
|
||||||
|
|
||||||
std::cout<<GridLogMessage<< " Preconditioner Aout " << norm2(tmp)<<std::endl;
|
|
||||||
ttmp=tmp;
|
ttmp=tmp;
|
||||||
tmp=tmp-r;
|
tmp=tmp-r;
|
||||||
|
|
||||||
std::cout<<GridLogMessage<< " Preconditioner resid " << std::sqrt(norm2(tmp)/norm2(r))<<std::endl;
|
|
||||||
/*
|
/*
|
||||||
std::cout<<GridLogMessage<<r<<std::endl;
|
std::cout<<GridLogMessage<<r<<std::endl;
|
||||||
std::cout<<GridLogMessage<<z<<std::endl;
|
std::cout<<GridLogMessage<<z<<std::endl;
|
||||||
@ -113,7 +162,9 @@ namespace Grid {
|
|||||||
std::cout<<GridLogMessage<<tmp<<std::endl;
|
std::cout<<GridLogMessage<<tmp<<std::endl;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
MatTimer.Start();
|
||||||
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
|
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
|
||||||
|
MatTimer.Stop();
|
||||||
|
|
||||||
//p[0],q[0],qq[0]
|
//p[0],q[0],qq[0]
|
||||||
p[0]= z;
|
p[0]= z;
|
||||||
@ -137,18 +188,22 @@ namespace Grid {
|
|||||||
|
|
||||||
cp = axpy_norm(r,-a,q[peri_k],r);
|
cp = axpy_norm(r,-a,q[peri_k],r);
|
||||||
|
|
||||||
std::cout<<GridLogMessage<< " VPGCR_step resid" <<sqrt(cp/rsq)<<std::endl;
|
|
||||||
if((k==nstep-1)||(cp<rsq)){
|
if((k==nstep-1)||(cp<rsq)){
|
||||||
return cp;
|
return cp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<< " VPGCR_step["<<steps<<"] resid " <<sqrt(cp/rsq)<<std::endl;
|
||||||
|
|
||||||
|
PrecTimer.Start();
|
||||||
Preconditioner(r,z);// solve Az = r
|
Preconditioner(r,z);// solve Az = r
|
||||||
|
PrecTimer.Stop();
|
||||||
|
|
||||||
|
MatTimer.Start();
|
||||||
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
|
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
|
||||||
|
|
||||||
|
|
||||||
Linop.HermOp(z,tmp);
|
Linop.HermOp(z,tmp);
|
||||||
|
MatTimer.Stop();
|
||||||
tmp=tmp-r;
|
tmp=tmp-r;
|
||||||
std::cout<<GridLogMessage<< " Preconditioner resid" <<sqrt(norm2(tmp)/norm2(r))<<std::endl;
|
std::cout<<GridLogMessage<< " Preconditioner resid " <<sqrt(norm2(tmp)/norm2(r))<<std::endl;
|
||||||
|
|
||||||
q[peri_kp]=Az;
|
q[peri_kp]=Az;
|
||||||
p[peri_kp]=z;
|
p[peri_kp]=z;
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/SchurRedBlack.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_SCHUR_RED_BLACK_H
|
#ifndef GRID_SCHUR_RED_BLACK_H
|
||||||
#define GRID_SCHUR_RED_BLACK_H
|
#define GRID_SCHUR_RED_BLACK_H
|
||||||
|
|
||||||
@ -75,6 +102,8 @@ namespace Grid {
|
|||||||
|
|
||||||
pickCheckerboard(Even,src_e,in);
|
pickCheckerboard(Even,src_e,in);
|
||||||
pickCheckerboard(Odd ,src_o,in);
|
pickCheckerboard(Odd ,src_o,in);
|
||||||
|
pickCheckerboard(Even,sol_e,out);
|
||||||
|
pickCheckerboard(Odd ,sol_o,out);
|
||||||
|
|
||||||
/////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////
|
||||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||||
|
@ -1,7 +1,34 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/cartesian/Cartesian_base.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_CARTESIAN_BASE_H
|
#ifndef GRID_CARTESIAN_BASE_H
|
||||||
#define GRID_CARTESIAN_BASE_H
|
#define GRID_CARTESIAN_BASE_H
|
||||||
|
|
||||||
#include <Grid.h>
|
|
||||||
|
|
||||||
namespace Grid{
|
namespace Grid{
|
||||||
|
|
||||||
@ -50,15 +77,12 @@ public:
|
|||||||
// GridCartesian / GridRedBlackCartesian
|
// GridCartesian / GridRedBlackCartesian
|
||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
virtual int CheckerBoarded(int dim)=0;
|
virtual int CheckerBoarded(int dim)=0;
|
||||||
virtual int CheckerBoard(std::vector<int> site)=0;
|
virtual int CheckerBoard(std::vector<int> &site)=0;
|
||||||
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
|
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
|
||||||
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
|
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
|
||||||
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
|
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
|
||||||
int CheckerBoardFromOindex (int Oindex){
|
virtual int CheckerBoardFromOindex (int Oindex)=0;
|
||||||
std::vector<int> ocoor;
|
virtual int CheckerBoardFromOindexTable (int Oindex)=0;
|
||||||
oCoorFromOindex(ocoor,Oindex);
|
|
||||||
return CheckerBoard(ocoor);
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Local layout calculations
|
// Local layout calculations
|
||||||
@ -79,6 +103,12 @@ public:
|
|||||||
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
|
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
|
||||||
return idx;
|
return idx;
|
||||||
}
|
}
|
||||||
|
virtual int iIndex(std::vector<int> &lcoor)
|
||||||
|
{
|
||||||
|
int idx=0;
|
||||||
|
for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
inline int oIndexReduced(std::vector<int> &ocoor)
|
inline int oIndexReduced(std::vector<int> &ocoor)
|
||||||
{
|
{
|
||||||
int idx=0;
|
int idx=0;
|
||||||
@ -87,45 +117,42 @@ public:
|
|||||||
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*ocoor[d];
|
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*ocoor[d];
|
||||||
return idx;
|
return idx;
|
||||||
}
|
}
|
||||||
static inline void CoorFromIndex (std::vector<int>& coor,int index,std::vector<int> &dims){
|
|
||||||
int nd= dims.size();
|
|
||||||
coor.resize(nd);
|
|
||||||
for(int d=0;d<nd;d++){
|
|
||||||
coor[d] = index % dims[d];
|
|
||||||
index = index / dims[d];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
inline void oCoorFromOindex (std::vector<int>& coor,int Oindex){
|
inline void oCoorFromOindex (std::vector<int>& coor,int Oindex){
|
||||||
CoorFromIndex(coor,Oindex,_rdimensions);
|
Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions);
|
||||||
}
|
|
||||||
static inline void IndexFromCoor (std::vector<int>& coor,int &index,std::vector<int> &dims){
|
|
||||||
int nd=dims.size();
|
|
||||||
int stride=1;
|
|
||||||
index=0;
|
|
||||||
for(int d=0;d<nd;d++){
|
|
||||||
index = index+stride*coor[d];
|
|
||||||
stride=stride*dims[d];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////
|
||||||
// SIMD lane addressing
|
// SIMD lane addressing
|
||||||
//////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////
|
||||||
inline int iIndex(std::vector<int> &lcoor)
|
|
||||||
{
|
|
||||||
int idx=0;
|
|
||||||
for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
|
|
||||||
return idx;
|
|
||||||
}
|
|
||||||
inline void iCoorFromIindex(std::vector<int> &coor,int lane)
|
inline void iCoorFromIindex(std::vector<int> &coor,int lane)
|
||||||
{
|
{
|
||||||
CoorFromIndex(coor,lane,_simd_layout);
|
Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
|
||||||
}
|
}
|
||||||
inline int PermuteDim(int dimension){
|
inline int PermuteDim(int dimension){
|
||||||
return _simd_layout[dimension]>1;
|
return _simd_layout[dimension]>1;
|
||||||
}
|
}
|
||||||
inline int PermuteType(int dimension){
|
inline int PermuteType(int dimension){
|
||||||
int permute_type=0;
|
int permute_type=0;
|
||||||
|
//
|
||||||
|
// FIXME:
|
||||||
|
//
|
||||||
|
// Best way to encode this would be to present a mask
|
||||||
|
// for which simd dimensions are rotated, and the rotation
|
||||||
|
// size. If there is only one simd dimension rotated, this is just
|
||||||
|
// a permute.
|
||||||
|
//
|
||||||
|
// Cases: PermuteType == 1,2,4,8
|
||||||
|
// Distance should be either 0,1,2..
|
||||||
|
//
|
||||||
|
if ( _simd_layout[dimension] > 2 ) {
|
||||||
|
for(int d=0;d<_ndimension;d++){
|
||||||
|
if ( d != dimension ) assert ( (_simd_layout[d]==1) );
|
||||||
|
}
|
||||||
|
permute_type = RotateBit; // How to specify distance; this is not just direction.
|
||||||
|
return permute_type;
|
||||||
|
}
|
||||||
|
|
||||||
for(int d=_ndimension-1;d>dimension;d--){
|
for(int d=_ndimension-1;d>dimension;d--){
|
||||||
if (_simd_layout[d]>1 ) permute_type++;
|
if (_simd_layout[d]>1 ) permute_type++;
|
||||||
}
|
}
|
||||||
@ -135,12 +162,12 @@ public:
|
|||||||
// Array sizing queries
|
// Array sizing queries
|
||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
inline int iSites(void) { return _isites; };
|
inline int iSites(void) const { return _isites; };
|
||||||
inline int Nsimd(void) { return _isites; };// Synonymous with iSites
|
inline int Nsimd(void) const { return _isites; };// Synonymous with iSites
|
||||||
inline int oSites(void) { return _osites; };
|
inline int oSites(void) const { return _osites; };
|
||||||
inline int lSites(void) { return _isites*_osites; };
|
inline int lSites(void) const { return _isites*_osites; };
|
||||||
inline int gSites(void) { return _isites*_osites*_Nprocessors; };
|
inline int gSites(void) const { return _isites*_osites*_Nprocessors; };
|
||||||
inline int Nd (void) { return _ndimension;};
|
inline int Nd (void) const { return _ndimension;};
|
||||||
|
|
||||||
inline const std::vector<int> &FullDimensions(void) { return _fdimensions;};
|
inline const std::vector<int> &FullDimensions(void) { return _fdimensions;};
|
||||||
inline const std::vector<int> &GlobalDimensions(void) { return _gdimensions;};
|
inline const std::vector<int> &GlobalDimensions(void) { return _gdimensions;};
|
||||||
@ -151,7 +178,10 @@ public:
|
|||||||
// Global addressing
|
// Global addressing
|
||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
void GlobalIndexToGlobalCoor(int gidx,std::vector<int> &gcoor){
|
void GlobalIndexToGlobalCoor(int gidx,std::vector<int> &gcoor){
|
||||||
CoorFromIndex(gcoor,gidx,_gdimensions);
|
Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions);
|
||||||
|
}
|
||||||
|
void LocalIndexToLocalCoor(int lidx,std::vector<int> &lcoor){
|
||||||
|
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
|
||||||
}
|
}
|
||||||
void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
|
void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
|
||||||
gidx=0;
|
gidx=0;
|
||||||
@ -186,7 +216,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim
|
i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim
|
||||||
o_idx= oIndex(lcoor);// this implies divide by 2 on checkerdim
|
o_idx= oIndex(lcoor); // this implies divide by 2 on checkerdim
|
||||||
}
|
}
|
||||||
|
|
||||||
void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor)
|
void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor)
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/cartesian/Cartesian_full.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_CARTESIAN_FULL_H
|
#ifndef GRID_CARTESIAN_FULL_H
|
||||||
#define GRID_CARTESIAN_FULL_H
|
#define GRID_CARTESIAN_FULL_H
|
||||||
|
|
||||||
@ -12,10 +39,17 @@ class GridCartesian: public GridBase {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
virtual int CheckerBoardFromOindexTable (int Oindex) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
virtual int CheckerBoardFromOindex (int Oindex)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
virtual int CheckerBoarded(int dim){
|
virtual int CheckerBoarded(int dim){
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
virtual int CheckerBoard(std::vector<int> site){
|
virtual int CheckerBoard(std::vector<int> &site){
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
virtual int CheckerBoardDestination(int cb,int shift,int dim){
|
virtual int CheckerBoardDestination(int cb,int shift,int dim){
|
||||||
|
@ -1,19 +1,41 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/cartesian/Cartesian_red_black.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_CARTESIAN_RED_BLACK_H
|
#ifndef GRID_CARTESIAN_RED_BLACK_H
|
||||||
#define GRID_CARTESIAN_RED_BLACK_H
|
#define GRID_CARTESIAN_RED_BLACK_H
|
||||||
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
static const int CbRed =0;
|
static const int CbRed =0;
|
||||||
static const int CbBlack=1;
|
static const int CbBlack=1;
|
||||||
static const int Even =CbRed;
|
static const int Even =CbRed;
|
||||||
static const int Odd =CbBlack;
|
static const int Odd =CbBlack;
|
||||||
|
|
||||||
// Perhaps these are misplaced and
|
|
||||||
// should be in sparse matrix.
|
|
||||||
// Also should make these a named enum type
|
|
||||||
static const int DaggerNo=0;
|
|
||||||
static const int DaggerYes=1;
|
|
||||||
|
|
||||||
// Specialise this for red black grids storing half the data like a chess board.
|
// Specialise this for red black grids storing half the data like a chess board.
|
||||||
class GridRedBlackCartesian : public GridBase
|
class GridRedBlackCartesian : public GridBase
|
||||||
@ -21,12 +43,13 @@ class GridRedBlackCartesian : public GridBase
|
|||||||
public:
|
public:
|
||||||
std::vector<int> _checker_dim_mask;
|
std::vector<int> _checker_dim_mask;
|
||||||
int _checker_dim;
|
int _checker_dim;
|
||||||
|
std::vector<int> _checker_board;
|
||||||
|
|
||||||
virtual int CheckerBoarded(int dim){
|
virtual int CheckerBoarded(int dim){
|
||||||
if( dim==_checker_dim) return 1;
|
if( dim==_checker_dim) return 1;
|
||||||
else return 0;
|
else return 0;
|
||||||
}
|
}
|
||||||
virtual int CheckerBoard(std::vector<int> site){
|
virtual int CheckerBoard(std::vector<int> &site){
|
||||||
int linear=0;
|
int linear=0;
|
||||||
assert(site.size()==_ndimension);
|
assert(site.size()==_ndimension);
|
||||||
for(int d=0;d<_ndimension;d++){
|
for(int d=0;d<_ndimension;d++){
|
||||||
@ -50,12 +73,20 @@ public:
|
|||||||
// or by looping over x,y,z and multiply rather than computing checkerboard.
|
// or by looping over x,y,z and multiply rather than computing checkerboard.
|
||||||
|
|
||||||
if ( (source_cb+ocb)&1 ) {
|
if ( (source_cb+ocb)&1 ) {
|
||||||
|
|
||||||
return (shift)/2;
|
return (shift)/2;
|
||||||
} else {
|
} else {
|
||||||
return (shift+1)/2;
|
return (shift+1)/2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
virtual int CheckerBoardFromOindexTable (int Oindex) {
|
||||||
|
return _checker_board[Oindex];
|
||||||
|
}
|
||||||
|
virtual int CheckerBoardFromOindex (int Oindex)
|
||||||
|
{
|
||||||
|
std::vector<int> ocoor;
|
||||||
|
oCoorFromOindex(ocoor,Oindex);
|
||||||
|
return CheckerBoard(ocoor);
|
||||||
|
}
|
||||||
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){
|
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){
|
||||||
|
|
||||||
if(dim != _checker_dim) return shift;
|
if(dim != _checker_dim) return shift;
|
||||||
@ -142,9 +173,15 @@ public:
|
|||||||
// Use a reduced simd grid
|
// Use a reduced simd grid
|
||||||
_simd_layout[d] = simd_layout[d];
|
_simd_layout[d] = simd_layout[d];
|
||||||
_rdimensions[d]= _ldimensions[d]/_simd_layout[d];
|
_rdimensions[d]= _ldimensions[d]/_simd_layout[d];
|
||||||
|
assert(_rdimensions[d]>0);
|
||||||
|
|
||||||
// all elements of a simd vector must have same checkerboard.
|
// all elements of a simd vector must have same checkerboard.
|
||||||
if ( simd_layout[d]>1 ) assert((_rdimensions[d]&0x1)==0);
|
// If Ls vectorised, this must still be the case; e.g. dwf rb5d
|
||||||
|
if ( _simd_layout[d]>1 ) {
|
||||||
|
if ( checker_dim_mask[d] ) {
|
||||||
|
assert( (_rdimensions[d]&0x1) == 0 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
_osites *= _rdimensions[d];
|
_osites *= _rdimensions[d];
|
||||||
_isites *= _simd_layout[d];
|
_isites *= _simd_layout[d];
|
||||||
@ -157,6 +194,8 @@ public:
|
|||||||
_ostride[d] = _ostride[d-1]*_rdimensions[d-1];
|
_ostride[d] = _ostride[d-1]*_rdimensions[d-1];
|
||||||
_istride[d] = _istride[d-1]*_simd_layout[d-1];
|
_istride[d] = _istride[d-1]*_simd_layout[d-1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -178,6 +217,18 @@ public:
|
|||||||
block = block*_rdimensions[d];
|
block = block*_rdimensions[d];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
// Create a checkerboard lookup table
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
int rvol = 1;
|
||||||
|
for(int d=0;d<_ndimension;d++){
|
||||||
|
rvol=rvol * _rdimensions[d];
|
||||||
|
}
|
||||||
|
_checker_board.resize(rvol);
|
||||||
|
for(int osite=0;osite<_osites;osite++){
|
||||||
|
_checker_board[osite] = CheckerBoardFromOindex (osite);
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
protected:
|
protected:
|
||||||
virtual int oIndex(std::vector<int> &coor)
|
virtual int oIndex(std::vector<int> &coor)
|
||||||
@ -190,9 +241,21 @@ protected:
|
|||||||
idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
|
idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return idx;
|
return idx;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
virtual int iIndex(std::vector<int> &lcoor)
|
||||||
|
{
|
||||||
|
int idx=0;
|
||||||
|
for(int d=0;d<_ndimension;d++) {
|
||||||
|
if( d==_checker_dim ) {
|
||||||
|
idx+=_istride[d]*(lcoor[d]/(2*_rdimensions[d]));
|
||||||
|
} else {
|
||||||
|
idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
124
lib/communicator/Communicator_base.cc
Normal file
124
lib/communicator/Communicator_base.cc
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/communicator/Communicator_none.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include "Grid.h"
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// Info that is setup once and indept of cartesian layout
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
void * CartesianCommunicator::ShmCommBuf;
|
||||||
|
uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 128*1024*1024;
|
||||||
|
|
||||||
|
/////////////////////////////////
|
||||||
|
// Alloc, free shmem region
|
||||||
|
/////////////////////////////////
|
||||||
|
void *CartesianCommunicator::ShmBufferMalloc(size_t bytes){
|
||||||
|
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
|
||||||
|
void *ptr = (void *)heap_top;
|
||||||
|
heap_top += bytes;
|
||||||
|
heap_bytes+= bytes;
|
||||||
|
if (heap_bytes >= MAX_MPI_SHM_BYTES) {
|
||||||
|
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
|
||||||
|
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
|
||||||
|
std::cout<< " Current value is " << (MAX_MPI_SHM_BYTES/(1024*1024)) <<std::endl;
|
||||||
|
assert(heap_bytes<MAX_MPI_SHM_BYTES);
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::ShmBufferFreeAll(void) {
|
||||||
|
heap_top =(size_t)ShmBufferSelf();
|
||||||
|
heap_bytes=0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////////
|
||||||
|
// Grid information queries
|
||||||
|
/////////////////////////////////
|
||||||
|
int CartesianCommunicator::IsBoss(void) { return _processor==0; };
|
||||||
|
int CartesianCommunicator::BossRank(void) { return 0; };
|
||||||
|
int CartesianCommunicator::ThisRank(void) { return _processor; };
|
||||||
|
const std::vector<int> & CartesianCommunicator::ThisProcessorCoor(void) { return _processor_coor; };
|
||||||
|
const std::vector<int> & CartesianCommunicator::ProcessorGrid(void) { return _processors; };
|
||||||
|
int CartesianCommunicator::ProcessorCount(void) { return _Nprocessors; };
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// very VERY rarely (Log, serial RNG) we need world without a grid
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
void CartesianCommunicator::GlobalSum(ComplexF &c)
|
||||||
|
{
|
||||||
|
GlobalSumVector((float *)&c,2);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(ComplexF *c,int N)
|
||||||
|
{
|
||||||
|
GlobalSumVector((float *)c,2*N);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(ComplexD &c)
|
||||||
|
{
|
||||||
|
GlobalSumVector((double *)&c,2);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
|
||||||
|
{
|
||||||
|
GlobalSumVector((double *)c,2*N);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L)
|
||||||
|
|
||||||
|
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
|
void *xmit,
|
||||||
|
int xmit_to_rank,
|
||||||
|
void *recv,
|
||||||
|
int recv_from_rank,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
|
||||||
|
{
|
||||||
|
SendToRecvFromComplete(waitall);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::StencilBarrier(void){};
|
||||||
|
|
||||||
|
commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector;
|
||||||
|
|
||||||
|
void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; }
|
||||||
|
|
||||||
|
void *CartesianCommunicator::ShmBuffer(int rank) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::ShmInitGeneric(void){
|
||||||
|
ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES);
|
||||||
|
ShmCommBuf=(void *)&ShmBufStorageVector[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -1,3 +1,31 @@
|
|||||||
|
|
||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/communicator/Communicator_base.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_COMMUNICATOR_BASE_H
|
#ifndef GRID_COMMUNICATOR_BASE_H
|
||||||
#define GRID_COMMUNICATOR_BASE_H
|
#define GRID_COMMUNICATOR_BASE_H
|
||||||
|
|
||||||
@ -7,118 +35,196 @@
|
|||||||
#ifdef GRID_COMMS_MPI
|
#ifdef GRID_COMMS_MPI
|
||||||
#include <mpi.h>
|
#include <mpi.h>
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef GRID_COMMS_MPI3
|
||||||
|
#include <mpi.h>
|
||||||
|
#endif
|
||||||
|
#ifdef GRID_COMMS_MPI3L
|
||||||
|
#include <mpi.h>
|
||||||
|
#endif
|
||||||
|
#ifdef GRID_COMMS_SHMEM
|
||||||
|
#include <mpp/shmem.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
class CartesianCommunicator {
|
class CartesianCommunicator {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
// 65536 ranks per node adequate for now
|
||||||
|
// 128MB shared memory for comms enought for 48^4 local vol comms
|
||||||
|
// Give external control (command line override?) of this
|
||||||
|
|
||||||
|
static const int MAXLOG2RANKSPERNODE = 16;
|
||||||
|
static uint64_t MAX_MPI_SHM_BYTES;
|
||||||
|
|
||||||
// Communicator should know nothing of the physics grid, only processor grid.
|
// Communicator should know nothing of the physics grid, only processor grid.
|
||||||
|
int _Nprocessors; // How many in all
|
||||||
|
std::vector<int> _processors; // Which dimensions get relayed out over processors lanes.
|
||||||
|
int _processor; // linear processor rank
|
||||||
|
std::vector<int> _processor_coor; // linear processor coordinate
|
||||||
|
unsigned long _ndimension;
|
||||||
|
|
||||||
int _Nprocessors; // How many in all
|
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPI3L)
|
||||||
std::vector<int> _processors; // Which dimensions get relayed out over processors lanes.
|
static MPI_Comm communicator_world;
|
||||||
int _processor; // linear processor rank
|
MPI_Comm communicator;
|
||||||
std::vector<int> _processor_coor; // linear processor coordinate
|
typedef MPI_Request CommsRequest_t;
|
||||||
unsigned long _ndimension;
|
|
||||||
|
|
||||||
#ifdef GRID_COMMS_MPI
|
|
||||||
MPI_Comm communicator;
|
|
||||||
typedef MPI_Request CommsRequest_t;
|
|
||||||
#else
|
#else
|
||||||
typedef int CommsRequest_t;
|
typedef int CommsRequest_t;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Constructor
|
////////////////////////////////////////////////////////////////////
|
||||||
CartesianCommunicator(const std::vector<int> &pdimensions_in);
|
// Helper functionality for SHM Windows common to all other impls
|
||||||
|
////////////////////////////////////////////////////////////////////
|
||||||
|
// Longer term; drop this in favour of a master / slave model with
|
||||||
|
// cartesian communicator on a subset of ranks, slave ranks controlled
|
||||||
|
// by group leader with data xfer via shared memory
|
||||||
|
////////////////////////////////////////////////////////////////////
|
||||||
|
#ifdef GRID_COMMS_MPI3
|
||||||
|
|
||||||
// Wraps MPI_Cart routines
|
static int ShmRank;
|
||||||
void ShiftedRanks(int dim,int shift,int & source, int & dest);
|
static int ShmSize;
|
||||||
int RankFromProcessorCoor(std::vector<int> &coor);
|
static int GroupRank;
|
||||||
void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
|
static int GroupSize;
|
||||||
|
static int WorldRank;
|
||||||
|
static int WorldSize;
|
||||||
|
|
||||||
/////////////////////////////////
|
std::vector<int> WorldDims;
|
||||||
// Grid information queries
|
std::vector<int> GroupDims;
|
||||||
/////////////////////////////////
|
std::vector<int> ShmDims;
|
||||||
int IsBoss(void) { return _processor==0; };
|
|
||||||
int BossRank(void) { return 0; };
|
|
||||||
int ThisRank(void) { return _processor; };
|
|
||||||
const std::vector<int> & ThisProcessorCoor(void) { return _processor_coor; };
|
|
||||||
const std::vector<int> & ProcessorGrid(void) { return _processors; };
|
|
||||||
int ProcessorCount(void) { return _Nprocessors; };
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
std::vector<int> GroupCoor;
|
||||||
// Reduction
|
std::vector<int> ShmCoor;
|
||||||
////////////////////////////////////////////////////////////
|
std::vector<int> WorldCoor;
|
||||||
void GlobalSum(RealF &);
|
|
||||||
void GlobalSumVector(RealF *,int N);
|
|
||||||
|
|
||||||
void GlobalSum(RealD &);
|
static std::vector<int> GroupRanks;
|
||||||
void GlobalSumVector(RealD *,int N);
|
static std::vector<int> MyGroup;
|
||||||
|
static int ShmSetup;
|
||||||
|
static MPI_Win ShmWindow;
|
||||||
|
static MPI_Comm ShmComm;
|
||||||
|
|
||||||
void GlobalSum(uint32_t &);
|
std::vector<int> LexicographicToWorldRank;
|
||||||
|
|
||||||
void GlobalSum(ComplexF &c)
|
static std::vector<void *> ShmCommBufs;
|
||||||
{
|
|
||||||
GlobalSumVector((float *)&c,2);
|
|
||||||
}
|
|
||||||
void GlobalSumVector(ComplexF *c,int N)
|
|
||||||
{
|
|
||||||
GlobalSumVector((float *)c,2*N);
|
|
||||||
}
|
|
||||||
|
|
||||||
void GlobalSum(ComplexD &c)
|
#else
|
||||||
{
|
static void ShmInitGeneric(void);
|
||||||
GlobalSumVector((double *)&c,2);
|
static commVector<uint8_t> ShmBufStorageVector;
|
||||||
}
|
#endif
|
||||||
void GlobalSumVector(ComplexD *c,int N)
|
|
||||||
{
|
|
||||||
GlobalSumVector((double *)c,2*N);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class obj> void GlobalSum(obj &o){
|
/////////////////////////////////
|
||||||
typedef typename obj::scalar_type scalar_type;
|
// Grid information and queries
|
||||||
int words = sizeof(obj)/sizeof(scalar_type);
|
// Implemented in Communicator_base.C
|
||||||
scalar_type * ptr = (scalar_type *)& o;
|
/////////////////////////////////
|
||||||
GlobalSumVector(ptr,words);
|
static void * ShmCommBuf;
|
||||||
}
|
size_t heap_top;
|
||||||
////////////////////////////////////////////////////////////
|
size_t heap_bytes;
|
||||||
// Face exchange, buffer swap in translational invariant way
|
|
||||||
////////////////////////////////////////////////////////////
|
|
||||||
void SendToRecvFrom(void *xmit,
|
|
||||||
int xmit_to_rank,
|
|
||||||
void *recv,
|
|
||||||
int recv_from_rank,
|
|
||||||
int bytes);
|
|
||||||
|
|
||||||
void RecvFrom(void *recv,
|
void *ShmBufferSelf(void);
|
||||||
int recv_from_rank,
|
void *ShmBuffer(int rank);
|
||||||
int bytes);
|
void *ShmBufferTranslate(int rank,void * local_p);
|
||||||
void SendTo(void *xmit,
|
void *ShmBufferMalloc(size_t bytes);
|
||||||
int xmit_to_rank,
|
void ShmBufferFreeAll(void) ;
|
||||||
int bytes);
|
|
||||||
|
|
||||||
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
////////////////////////////////////////////////
|
||||||
void *xmit,
|
// Must call in Grid startup
|
||||||
int xmit_to_rank,
|
////////////////////////////////////////////////
|
||||||
void *recv,
|
static void Init(int *argc, char ***argv);
|
||||||
int recv_from_rank,
|
|
||||||
int bytes);
|
|
||||||
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////
|
||||||
// Barrier
|
// Constructor of any given grid
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////
|
||||||
void Barrier(void);
|
CartesianCommunicator(const std::vector<int> &pdimensions_in);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Broadcast a buffer and composite larger
|
// Wraps MPI_Cart routines, or implements equivalent on other impls
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////
|
||||||
void Broadcast(int root,void* data, int bytes);
|
void ShiftedRanks(int dim,int shift,int & source, int & dest);
|
||||||
template<class obj> void Broadcast(int root,obj &data)
|
int RankFromProcessorCoor(std::vector<int> &coor);
|
||||||
|
void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
|
||||||
|
|
||||||
|
int IsBoss(void) ;
|
||||||
|
int BossRank(void) ;
|
||||||
|
int ThisRank(void) ;
|
||||||
|
const std::vector<int> & ThisProcessorCoor(void) ;
|
||||||
|
const std::vector<int> & ProcessorGrid(void) ;
|
||||||
|
int ProcessorCount(void) ;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// very VERY rarely (Log, serial RNG) we need world without a grid
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
static int RankWorld(void) ;
|
||||||
|
static void BroadcastWorld(int root,void* data, int bytes);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
// Reduction
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
void GlobalSum(RealF &);
|
||||||
|
void GlobalSumVector(RealF *,int N);
|
||||||
|
void GlobalSum(RealD &);
|
||||||
|
void GlobalSumVector(RealD *,int N);
|
||||||
|
void GlobalSum(uint32_t &);
|
||||||
|
void GlobalSum(uint64_t &);
|
||||||
|
void GlobalSum(ComplexF &c);
|
||||||
|
void GlobalSumVector(ComplexF *c,int N);
|
||||||
|
void GlobalSum(ComplexD &c);
|
||||||
|
void GlobalSumVector(ComplexD *c,int N);
|
||||||
|
|
||||||
|
template<class obj> void GlobalSum(obj &o){
|
||||||
|
typedef typename obj::scalar_type scalar_type;
|
||||||
|
int words = sizeof(obj)/sizeof(scalar_type);
|
||||||
|
scalar_type * ptr = (scalar_type *)& o;
|
||||||
|
GlobalSumVector(ptr,words);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
// Face exchange, buffer swap in translational invariant way
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
void SendToRecvFrom(void *xmit,
|
||||||
|
int xmit_to_rank,
|
||||||
|
void *recv,
|
||||||
|
int recv_from_rank,
|
||||||
|
int bytes);
|
||||||
|
|
||||||
|
void SendRecvPacket(void *xmit,
|
||||||
|
void *recv,
|
||||||
|
int xmit_to_rank,
|
||||||
|
int recv_from_rank,
|
||||||
|
int bytes);
|
||||||
|
|
||||||
|
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
|
void *xmit,
|
||||||
|
int xmit_to_rank,
|
||||||
|
void *recv,
|
||||||
|
int recv_from_rank,
|
||||||
|
int bytes);
|
||||||
|
|
||||||
|
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
||||||
|
|
||||||
|
void StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
|
void *xmit,
|
||||||
|
int xmit_to_rank,
|
||||||
|
void *recv,
|
||||||
|
int recv_from_rank,
|
||||||
|
int bytes);
|
||||||
|
|
||||||
|
void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
||||||
|
void StencilBarrier(void);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
// Barrier
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
void Barrier(void);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
// Broadcast a buffer and composite larger
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
void Broadcast(int root,void* data, int bytes);
|
||||||
|
|
||||||
|
template<class obj> void Broadcast(int root,obj &data)
|
||||||
{
|
{
|
||||||
Broadcast(root,(void *)&data,sizeof(data));
|
Broadcast(root,(void *)&data,sizeof(data));
|
||||||
};
|
};
|
||||||
|
|
||||||
static void BroadcastWorld(int root,void* data, int bytes);
|
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,9 +1,51 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/communicator/Communicator_mpi.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#include "Grid.h"
|
#include "Grid.h"
|
||||||
#include <mpi.h>
|
#include <mpi.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
// Should error check all MPI calls.
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Info that is setup once and indept of cartesian layout
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
MPI_Comm CartesianCommunicator::communicator_world;
|
||||||
|
|
||||||
|
// Should error check all MPI calls.
|
||||||
|
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||||
|
int flag;
|
||||||
|
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||||
|
if ( !flag ) {
|
||||||
|
MPI_Init(argc,argv);
|
||||||
|
}
|
||||||
|
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
||||||
|
ShmInitGeneric();
|
||||||
|
}
|
||||||
|
|
||||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||||
{
|
{
|
||||||
@ -14,7 +56,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
|||||||
_processors = processors;
|
_processors = processors;
|
||||||
_processor_coor.resize(_ndimension);
|
_processor_coor.resize(_ndimension);
|
||||||
|
|
||||||
MPI_Cart_create(MPI_COMM_WORLD, _ndimension,&_processors[0],&periodic[0],1,&communicator);
|
MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator);
|
||||||
MPI_Comm_rank(communicator,&_processor);
|
MPI_Comm_rank(communicator,&_processor);
|
||||||
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
||||||
|
|
||||||
@ -27,11 +69,14 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
|||||||
|
|
||||||
assert(Size==_Nprocessors);
|
assert(Size==_Nprocessors);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
}
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(uint64_t &u){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
void CartesianCommunicator::GlobalSum(float &f){
|
void CartesianCommunicator::GlobalSum(float &f){
|
||||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
@ -81,21 +126,22 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
|||||||
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
||||||
SendToRecvFromComplete(reqs);
|
SendToRecvFromComplete(reqs);
|
||||||
}
|
}
|
||||||
void CartesianCommunicator::RecvFrom(void *recv,
|
|
||||||
int from,
|
void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||||
int bytes)
|
void *recv,
|
||||||
|
int sender,
|
||||||
|
int receiver,
|
||||||
|
int bytes)
|
||||||
{
|
{
|
||||||
MPI_Status stat;
|
MPI_Status stat;
|
||||||
int ierr=MPI_Recv(recv, bytes, MPI_CHAR,from,from,communicator,&stat);
|
assert(sender != receiver);
|
||||||
assert(ierr==0);
|
int tag = sender;
|
||||||
}
|
if ( _processor == sender ) {
|
||||||
void CartesianCommunicator::SendTo(void *xmit,
|
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
|
||||||
int dest,
|
}
|
||||||
int bytes)
|
if ( _processor == receiver ) {
|
||||||
{
|
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
|
||||||
int rank = _processor; // used for tag; must know who it comes from
|
}
|
||||||
int ierr = MPI_Send(xmit, bytes, MPI_CHAR,dest,_processor,communicator);
|
|
||||||
assert(ierr==0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Basic Halo comms primitive
|
// Basic Halo comms primitive
|
||||||
@ -123,7 +169,6 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
|
|||||||
int nreq=list.size();
|
int nreq=list.size();
|
||||||
std::vector<MPI_Status> status(nreq);
|
std::vector<MPI_Status> status(nreq);
|
||||||
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
|
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
|
||||||
|
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -142,14 +187,22 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
|||||||
communicator);
|
communicator);
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
}
|
}
|
||||||
|
///////////////////////////////////////////////////////
|
||||||
|
// Should only be used prior to Grid Init finished.
|
||||||
|
// Check for this?
|
||||||
|
///////////////////////////////////////////////////////
|
||||||
|
int CartesianCommunicator::RankWorld(void){
|
||||||
|
int r;
|
||||||
|
MPI_Comm_rank(communicator_world,&r);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||||
{
|
{
|
||||||
int ierr= MPI_Bcast(data,
|
int ierr= MPI_Bcast(data,
|
||||||
bytes,
|
bytes,
|
||||||
MPI_BYTE,
|
MPI_BYTE,
|
||||||
root,
|
root,
|
||||||
MPI_COMM_WORLD);
|
communicator_world);
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
580
lib/communicator/Communicator_mpi3.cc
Normal file
580
lib/communicator/Communicator_mpi3.cc
Normal file
@ -0,0 +1,580 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/communicator/Communicator_mpi.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include "Grid.h"
|
||||||
|
#include <mpi.h>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Info that is setup once and indept of cartesian layout
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
int CartesianCommunicator::ShmSetup = 0;
|
||||||
|
|
||||||
|
int CartesianCommunicator::ShmRank;
|
||||||
|
int CartesianCommunicator::ShmSize;
|
||||||
|
int CartesianCommunicator::GroupRank;
|
||||||
|
int CartesianCommunicator::GroupSize;
|
||||||
|
int CartesianCommunicator::WorldRank;
|
||||||
|
int CartesianCommunicator::WorldSize;
|
||||||
|
|
||||||
|
MPI_Comm CartesianCommunicator::communicator_world;
|
||||||
|
MPI_Comm CartesianCommunicator::ShmComm;
|
||||||
|
MPI_Win CartesianCommunicator::ShmWindow;
|
||||||
|
|
||||||
|
std::vector<int> CartesianCommunicator::GroupRanks;
|
||||||
|
std::vector<int> CartesianCommunicator::MyGroup;
|
||||||
|
std::vector<void *> CartesianCommunicator::ShmCommBufs;
|
||||||
|
|
||||||
|
void *CartesianCommunicator::ShmBufferSelf(void)
|
||||||
|
{
|
||||||
|
return ShmCommBufs[ShmRank];
|
||||||
|
}
|
||||||
|
void *CartesianCommunicator::ShmBuffer(int rank)
|
||||||
|
{
|
||||||
|
int gpeer = GroupRanks[rank];
|
||||||
|
if (gpeer == MPI_UNDEFINED){
|
||||||
|
return NULL;
|
||||||
|
} else {
|
||||||
|
return ShmCommBufs[gpeer];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p)
|
||||||
|
{
|
||||||
|
int gpeer = GroupRanks[rank];
|
||||||
|
if (gpeer == MPI_UNDEFINED){
|
||||||
|
return NULL;
|
||||||
|
} else {
|
||||||
|
uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank];
|
||||||
|
uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset;
|
||||||
|
return (void *) remote;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||||
|
int flag;
|
||||||
|
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||||
|
if ( !flag ) {
|
||||||
|
MPI_Init(argc,argv);
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
||||||
|
MPI_Comm_rank(communicator_world,&WorldRank);
|
||||||
|
MPI_Comm_size(communicator_world,&WorldSize);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
// Split into groups that can share memory
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
MPI_Comm_split_type(communicator_world, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm);
|
||||||
|
MPI_Comm_rank(ShmComm ,&ShmRank);
|
||||||
|
MPI_Comm_size(ShmComm ,&ShmSize);
|
||||||
|
GroupSize = WorldSize/ShmSize;
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
// find world ranks in our SHM group (i.e. which ranks are on our node)
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
MPI_Group WorldGroup, ShmGroup;
|
||||||
|
MPI_Comm_group (communicator_world, &WorldGroup);
|
||||||
|
MPI_Comm_group (ShmComm, &ShmGroup);
|
||||||
|
|
||||||
|
std::vector<int> world_ranks(WorldSize);
|
||||||
|
GroupRanks.resize(WorldSize);
|
||||||
|
for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
|
||||||
|
|
||||||
|
MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &GroupRanks[0]);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// Identify who is in my group and noninate the leader
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
int g=0;
|
||||||
|
MyGroup.resize(ShmSize);
|
||||||
|
for(int rank=0;rank<WorldSize;rank++){
|
||||||
|
if(GroupRanks[rank]!=MPI_UNDEFINED){
|
||||||
|
assert(g<ShmSize);
|
||||||
|
MyGroup[g++] = rank;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(MyGroup.begin(),MyGroup.end(),std::less<int>());
|
||||||
|
int myleader = MyGroup[0];
|
||||||
|
|
||||||
|
std::vector<int> leaders_1hot(WorldSize,0);
|
||||||
|
std::vector<int> leaders_group(GroupSize,0);
|
||||||
|
leaders_1hot [ myleader ] = 1;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// global sum leaders over comm world
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator_world);
|
||||||
|
assert(ierr==0);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// find the group leaders world rank
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
int group=0;
|
||||||
|
for(int l=0;l<WorldSize;l++){
|
||||||
|
if(leaders_1hot[l]){
|
||||||
|
leaders_group[group++] = l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// Identify the rank of the group in which I (and my leader) live
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
GroupRank=-1;
|
||||||
|
for(int g=0;g<GroupSize;g++){
|
||||||
|
if (myleader == leaders_group[g]){
|
||||||
|
GroupRank=g;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(GroupRank!=-1);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// allocate the shared window for our group
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
ShmCommBuf = 0;
|
||||||
|
ierr = MPI_Win_allocate_shared(MAX_MPI_SHM_BYTES,1,MPI_INFO_NULL,ShmComm,&ShmCommBuf,&ShmWindow);
|
||||||
|
assert(ierr==0);
|
||||||
|
// KNL hack -- force to numa-domain 1 in flat
|
||||||
|
#if 0
|
||||||
|
//#include <numaif.h>
|
||||||
|
for(uint64_t page=0;page<MAX_MPI_SHM_BYTES;page+=4096){
|
||||||
|
void *pages = (void *) ( page + ShmCommBuf );
|
||||||
|
int status;
|
||||||
|
int flags=MPOL_MF_MOVE_ALL;
|
||||||
|
int nodes=1; // numa domain == MCDRAM
|
||||||
|
unsigned long count=1;
|
||||||
|
ierr= move_pages(0,count, &pages,&nodes,&status,flags);
|
||||||
|
if (ierr && (page==0)) perror("numa relocate command failed");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
MPI_Win_lock_all (MPI_MODE_NOCHECK, ShmWindow);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Plan: allocate a fixed SHM region. Scratch that is just used via some scheme during stencil comms, with no allocate free.
|
||||||
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
ShmCommBufs.resize(ShmSize);
|
||||||
|
for(int r=0;r<ShmSize;r++){
|
||||||
|
MPI_Aint sz;
|
||||||
|
int dsp_unit;
|
||||||
|
MPI_Win_shared_query (ShmWindow, r, &sz, &dsp_unit, &ShmCommBufs[r]);
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Verbose for now
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
if (WorldRank == 0){
|
||||||
|
std::cout<<GridLogMessage<< "Grid MPI-3 configuration: detected ";
|
||||||
|
std::cout<< WorldSize << " Ranks " ;
|
||||||
|
std::cout<< GroupSize << " Nodes " ;
|
||||||
|
std::cout<< ShmSize << " with ranks-per-node "<<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage <<"Grid MPI-3 configuration: allocated shared memory region of size ";
|
||||||
|
std::cout<<std::hex << MAX_MPI_SHM_BYTES <<" ShmCommBuf address = "<<ShmCommBuf << std::dec<<std::endl;
|
||||||
|
|
||||||
|
for(int g=0;g<GroupSize;g++){
|
||||||
|
std::cout<<GridLogMessage<<" Node "<<g<<" led by MPI rank "<<leaders_group[g]<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<" Boss Node Shm Pointers are {";
|
||||||
|
for(int g=0;g<ShmSize;g++){
|
||||||
|
std::cout<<std::hex<<ShmCommBufs[g]<<std::dec;
|
||||||
|
if(g!=ShmSize-1) std::cout<<",";
|
||||||
|
else std::cout<<"}"<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int g=0;g<GroupSize;g++){
|
||||||
|
if ( (ShmRank == 0) && (GroupRank==g) ) std::cout<<GridLogMessage<<"["<<g<<"] Node Group "<<g<<" is ranks {";
|
||||||
|
for(int r=0;r<ShmSize;r++){
|
||||||
|
if ( (ShmRank == 0) && (GroupRank==g) ) {
|
||||||
|
std::cout<<MyGroup[r];
|
||||||
|
if(r<ShmSize-1) std::cout<<",";
|
||||||
|
else std::cout<<"}"<<std::endl;
|
||||||
|
}
|
||||||
|
MPI_Barrier(communicator_world);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(ShmSetup==0); ShmSetup=1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Want to implement some magic ... Group sub-cubes into those on same node
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||||
|
{
|
||||||
|
std::vector<int> coor = _processor_coor;
|
||||||
|
|
||||||
|
assert(std::abs(shift) <_processors[dim]);
|
||||||
|
|
||||||
|
coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim];
|
||||||
|
Lexicographic::IndexFromCoor(coor,source,_processors);
|
||||||
|
source = LexicographicToWorldRank[source];
|
||||||
|
|
||||||
|
coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim];
|
||||||
|
Lexicographic::IndexFromCoor(coor,dest,_processors);
|
||||||
|
dest = LexicographicToWorldRank[dest];
|
||||||
|
}
|
||||||
|
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
|
||||||
|
{
|
||||||
|
int rank;
|
||||||
|
Lexicographic::IndexFromCoor(coor,rank,_processors);
|
||||||
|
rank = LexicographicToWorldRank[rank];
|
||||||
|
return rank;
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
|
||||||
|
{
|
||||||
|
Lexicographic::CoorFromIndex(coor,rank,_processors);
|
||||||
|
rank = LexicographicToWorldRank[rank];
|
||||||
|
}
|
||||||
|
|
||||||
|
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||||
|
{
|
||||||
|
int ierr;
|
||||||
|
|
||||||
|
communicator=communicator_world;
|
||||||
|
|
||||||
|
_ndimension = processors.size();
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Assert power of two shm_size.
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
int log2size = -1;
|
||||||
|
for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){
|
||||||
|
if ( (0x1<<i) == ShmSize ) {
|
||||||
|
log2size = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(log2size != -1);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Identify subblock of ranks on node spreading across dims
|
||||||
|
// in a maximally symmetrical way
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
int dim = 0;
|
||||||
|
|
||||||
|
std::vector<int> WorldDims = processors;
|
||||||
|
|
||||||
|
ShmDims.resize(_ndimension,1);
|
||||||
|
GroupDims.resize(_ndimension);
|
||||||
|
|
||||||
|
ShmCoor.resize(_ndimension);
|
||||||
|
GroupCoor.resize(_ndimension);
|
||||||
|
WorldCoor.resize(_ndimension);
|
||||||
|
|
||||||
|
for(int l2=0;l2<log2size;l2++){
|
||||||
|
while ( WorldDims[dim] / ShmDims[dim] <= 1 ) dim=(dim+1)%_ndimension;
|
||||||
|
ShmDims[dim]*=2;
|
||||||
|
dim=(dim+1)%_ndimension;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Establish torus of processes and nodes with sub-blockings
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
for(int d=0;d<_ndimension;d++){
|
||||||
|
GroupDims[d] = WorldDims[d]/ShmDims[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Check processor counts match
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
_Nprocessors=1;
|
||||||
|
_processors = processors;
|
||||||
|
_processor_coor.resize(_ndimension);
|
||||||
|
for(int i=0;i<_ndimension;i++){
|
||||||
|
_Nprocessors*=_processors[i];
|
||||||
|
}
|
||||||
|
assert(WorldSize==_Nprocessors);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Establish mapping between lexico physics coord and WorldRank
|
||||||
|
//
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
LexicographicToWorldRank.resize(WorldSize,0);
|
||||||
|
Lexicographic::CoorFromIndex(GroupCoor,GroupRank,GroupDims);
|
||||||
|
Lexicographic::CoorFromIndex(ShmCoor,ShmRank,ShmDims);
|
||||||
|
for(int d=0;d<_ndimension;d++){
|
||||||
|
WorldCoor[d] = GroupCoor[d]*ShmDims[d]+ShmCoor[d];
|
||||||
|
}
|
||||||
|
_processor_coor = WorldCoor;
|
||||||
|
|
||||||
|
int lexico;
|
||||||
|
Lexicographic::IndexFromCoor(WorldCoor,lexico,WorldDims);
|
||||||
|
LexicographicToWorldRank[lexico]=WorldRank;
|
||||||
|
_processor = lexico;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// global sum Lexico to World mapping
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
ierr=MPI_Allreduce(MPI_IN_PLACE,&LexicographicToWorldRank[0],WorldSize,MPI_INT,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(uint64_t &u){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(float &f){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(float *f,int N)
|
||||||
|
{
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(double &d)
|
||||||
|
{
|
||||||
|
int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(double *d,int N)
|
||||||
|
{
|
||||||
|
int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Basic Halo comms primitive
|
||||||
|
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||||
|
int dest,
|
||||||
|
void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
std::vector<CommsRequest_t> reqs(0);
|
||||||
|
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
||||||
|
SendToRecvFromComplete(reqs);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||||
|
void *recv,
|
||||||
|
int sender,
|
||||||
|
int receiver,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
MPI_Status stat;
|
||||||
|
assert(sender != receiver);
|
||||||
|
int tag = sender;
|
||||||
|
if ( _processor == sender ) {
|
||||||
|
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
|
||||||
|
}
|
||||||
|
if ( _processor == receiver ) {
|
||||||
|
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Basic Halo comms primitive
|
||||||
|
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
|
void *xmit,
|
||||||
|
int dest,
|
||||||
|
void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
#if 0
|
||||||
|
this->StencilBarrier();
|
||||||
|
|
||||||
|
MPI_Request xrq;
|
||||||
|
MPI_Request rrq;
|
||||||
|
|
||||||
|
static int sequence;
|
||||||
|
|
||||||
|
int ierr;
|
||||||
|
int tag;
|
||||||
|
int check;
|
||||||
|
|
||||||
|
assert(dest != _processor);
|
||||||
|
assert(from != _processor);
|
||||||
|
|
||||||
|
int gdest = GroupRanks[dest];
|
||||||
|
int gfrom = GroupRanks[from];
|
||||||
|
int gme = GroupRanks[_processor];
|
||||||
|
|
||||||
|
sequence++;
|
||||||
|
|
||||||
|
char *from_ptr = (char *)ShmCommBufs[ShmRank];
|
||||||
|
|
||||||
|
int small = (bytes<MAX_MPI_SHM_BYTES);
|
||||||
|
|
||||||
|
typedef uint64_t T;
|
||||||
|
int words = bytes/sizeof(T);
|
||||||
|
|
||||||
|
assert(((size_t)bytes &(sizeof(T)-1))==0);
|
||||||
|
assert(gme == ShmRank);
|
||||||
|
|
||||||
|
if ( small && (gdest !=MPI_UNDEFINED) ) {
|
||||||
|
|
||||||
|
char *to_ptr = (char *)ShmCommBufs[gdest];
|
||||||
|
|
||||||
|
assert(gme != gdest);
|
||||||
|
|
||||||
|
T *ip = (T *)xmit;
|
||||||
|
T *op = (T *)to_ptr;
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int w=0;w<words;w++) {
|
||||||
|
op[w]=ip[w];
|
||||||
|
}
|
||||||
|
|
||||||
|
bcopy(&_processor,&to_ptr[bytes],sizeof(_processor));
|
||||||
|
bcopy(& sequence,&to_ptr[bytes+4],sizeof(sequence));
|
||||||
|
} else {
|
||||||
|
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||||
|
assert(ierr==0);
|
||||||
|
list.push_back(xrq);
|
||||||
|
}
|
||||||
|
|
||||||
|
this->StencilBarrier();
|
||||||
|
|
||||||
|
if (small && (gfrom !=MPI_UNDEFINED) ) {
|
||||||
|
T *ip = (T *)from_ptr;
|
||||||
|
T *op = (T *)recv;
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int w=0;w<words;w++) {
|
||||||
|
op[w]=ip[w];
|
||||||
|
}
|
||||||
|
bcopy(&from_ptr[bytes] ,&tag ,sizeof(tag));
|
||||||
|
bcopy(&from_ptr[bytes+4],&check,sizeof(check));
|
||||||
|
assert(check==sequence);
|
||||||
|
assert(tag==from);
|
||||||
|
} else {
|
||||||
|
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||||
|
assert(ierr==0);
|
||||||
|
list.push_back(rrq);
|
||||||
|
}
|
||||||
|
|
||||||
|
this->StencilBarrier();
|
||||||
|
|
||||||
|
#else
|
||||||
|
MPI_Request xrq;
|
||||||
|
MPI_Request rrq;
|
||||||
|
int rank = _processor;
|
||||||
|
int ierr;
|
||||||
|
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||||
|
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||||
|
|
||||||
|
assert(ierr==0);
|
||||||
|
|
||||||
|
list.push_back(xrq);
|
||||||
|
list.push_back(rrq);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
|
void *xmit,
|
||||||
|
int dest,
|
||||||
|
void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
MPI_Request xrq;
|
||||||
|
MPI_Request rrq;
|
||||||
|
|
||||||
|
int ierr;
|
||||||
|
|
||||||
|
assert(dest != _processor);
|
||||||
|
assert(from != _processor);
|
||||||
|
|
||||||
|
int gdest = GroupRanks[dest];
|
||||||
|
int gfrom = GroupRanks[from];
|
||||||
|
int gme = GroupRanks[_processor];
|
||||||
|
|
||||||
|
assert(gme == ShmRank);
|
||||||
|
|
||||||
|
if ( gdest == MPI_UNDEFINED ) {
|
||||||
|
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||||
|
assert(ierr==0);
|
||||||
|
list.push_back(xrq);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( gfrom ==MPI_UNDEFINED) {
|
||||||
|
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||||
|
assert(ierr==0);
|
||||||
|
list.push_back(rrq);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||||
|
{
|
||||||
|
SendToRecvFromComplete(list);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::StencilBarrier(void)
|
||||||
|
{
|
||||||
|
MPI_Win_sync (ShmWindow);
|
||||||
|
MPI_Barrier (ShmComm);
|
||||||
|
MPI_Win_sync (ShmWindow);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||||
|
{
|
||||||
|
int nreq=list.size();
|
||||||
|
std::vector<MPI_Status> status(nreq);
|
||||||
|
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::Barrier(void)
|
||||||
|
{
|
||||||
|
int ierr = MPI_Barrier(communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||||
|
{
|
||||||
|
int ierr=MPI_Bcast(data,
|
||||||
|
bytes,
|
||||||
|
MPI_BYTE,
|
||||||
|
root,
|
||||||
|
communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||||
|
{
|
||||||
|
int ierr= MPI_Bcast(data,
|
||||||
|
bytes,
|
||||||
|
MPI_BYTE,
|
||||||
|
root,
|
||||||
|
communicator_world);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
874
lib/communicator/Communicator_mpi3_leader.cc
Normal file
874
lib/communicator/Communicator_mpi3_leader.cc
Normal file
@ -0,0 +1,874 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/communicator/Communicator_mpi.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include "Grid.h"
|
||||||
|
#include <mpi.h>
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Workarounds:
|
||||||
|
/// i) bloody mac os doesn't implement unnamed semaphores since it is "optional" posix.
|
||||||
|
/// darwin dispatch semaphores don't seem to be multiprocess.
|
||||||
|
///
|
||||||
|
/// ii) openmpi under --mca shmem posix works with two squadrons per node;
|
||||||
|
/// openmpi under default mca settings (I think --mca shmem mmap) on MacOS makes two squadrons map the SAME
|
||||||
|
/// memory as each other, despite their living on different communicators. This appears to be a bug in OpenMPI.
|
||||||
|
///
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
#include <semaphore.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
|
typedef sem_t *Grid_semaphore;
|
||||||
|
|
||||||
|
#define SEM_INIT(S) S = sem_open(sem_name,0,0600,0); assert ( S != SEM_FAILED );
|
||||||
|
#define SEM_INIT_EXCL(S) sem_unlink(sem_name); S = sem_open(sem_name,O_CREAT|O_EXCL,0600,0); assert ( S != SEM_FAILED );
|
||||||
|
#define SEM_POST(S) assert ( sem_post(S) == 0 );
|
||||||
|
#define SEM_WAIT(S) assert ( sem_wait(S) == 0 );
|
||||||
|
|
||||||
|
#include <sys/mman.h>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
enum { COMMAND_ISEND, COMMAND_IRECV, COMMAND_WAITALL };
|
||||||
|
|
||||||
|
struct Descriptor {
|
||||||
|
uint64_t buf;
|
||||||
|
size_t bytes;
|
||||||
|
int rank;
|
||||||
|
int tag;
|
||||||
|
int command;
|
||||||
|
MPI_Request request;
|
||||||
|
};
|
||||||
|
|
||||||
|
const int pool = 48;
|
||||||
|
|
||||||
|
class SlaveState {
|
||||||
|
public:
|
||||||
|
volatile int head;
|
||||||
|
volatile int start;
|
||||||
|
volatile int tail;
|
||||||
|
volatile Descriptor Descrs[pool];
|
||||||
|
};
|
||||||
|
|
||||||
|
class Slave {
|
||||||
|
public:
|
||||||
|
Grid_semaphore sem_head;
|
||||||
|
Grid_semaphore sem_tail;
|
||||||
|
SlaveState *state;
|
||||||
|
MPI_Comm squadron;
|
||||||
|
uint64_t base;
|
||||||
|
int universe_rank;
|
||||||
|
int vertical_rank;
|
||||||
|
char sem_name [NAME_MAX];
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
// Descriptor circular pointers
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
Slave() {};
|
||||||
|
|
||||||
|
void Init(SlaveState * _state,MPI_Comm _squadron,int _universe_rank,int _vertical_rank);
|
||||||
|
|
||||||
|
void SemInit(void) {
|
||||||
|
sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank);
|
||||||
|
// printf("SEM_NAME: %s \n",sem_name);
|
||||||
|
SEM_INIT(sem_head);
|
||||||
|
sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank);
|
||||||
|
// printf("SEM_NAME: %s \n",sem_name);
|
||||||
|
SEM_INIT(sem_tail);
|
||||||
|
}
|
||||||
|
void SemInitExcl(void) {
|
||||||
|
sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank);
|
||||||
|
// printf("SEM_INIT_EXCL: %s \n",sem_name);
|
||||||
|
SEM_INIT_EXCL(sem_head);
|
||||||
|
sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank);
|
||||||
|
// printf("SEM_INIT_EXCL: %s \n",sem_name);
|
||||||
|
SEM_INIT_EXCL(sem_tail);
|
||||||
|
}
|
||||||
|
void WakeUpDMA(void) {
|
||||||
|
SEM_POST(sem_head);
|
||||||
|
};
|
||||||
|
void WakeUpCompute(void) {
|
||||||
|
SEM_POST(sem_tail);
|
||||||
|
};
|
||||||
|
void WaitForCommand(void) {
|
||||||
|
SEM_WAIT(sem_head);
|
||||||
|
};
|
||||||
|
void WaitForComplete(void) {
|
||||||
|
SEM_WAIT(sem_tail);
|
||||||
|
};
|
||||||
|
void EventLoop (void) {
|
||||||
|
// std::cout<< " Entering event loop "<<std::endl;
|
||||||
|
while(1){
|
||||||
|
WaitForCommand();
|
||||||
|
// std::cout << "Getting command "<<std::endl;
|
||||||
|
Event();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int Event (void) ;
|
||||||
|
|
||||||
|
uint64_t QueueCommand(int command,void *buf, int bytes, int hashtag, MPI_Comm comm,int u_rank) ;
|
||||||
|
|
||||||
|
void WaitAll() {
|
||||||
|
// std::cout << "Queueing WAIT command "<<std::endl;
|
||||||
|
QueueCommand(COMMAND_WAITALL,0,0,0,squadron,0);
|
||||||
|
// std::cout << "Waking up DMA "<<std::endl;
|
||||||
|
WakeUpDMA();
|
||||||
|
// std::cout << "Waiting from semaphore "<<std::endl;
|
||||||
|
WaitForComplete();
|
||||||
|
// std::cout << "Checking FIFO is empty "<<std::endl;
|
||||||
|
assert ( state->tail == state->head );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
// One instance of a data mover.
|
||||||
|
// Master and Slave must agree on location in shared memory
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
class MPIoffloadEngine {
|
||||||
|
public:
|
||||||
|
|
||||||
|
static std::vector<Slave> Slaves;
|
||||||
|
|
||||||
|
static int ShmSetup;
|
||||||
|
|
||||||
|
static int UniverseRank;
|
||||||
|
static int UniverseSize;
|
||||||
|
|
||||||
|
static MPI_Comm communicator_universe;
|
||||||
|
static MPI_Comm communicator_cached;
|
||||||
|
|
||||||
|
static MPI_Comm HorizontalComm;
|
||||||
|
static int HorizontalRank;
|
||||||
|
static int HorizontalSize;
|
||||||
|
|
||||||
|
static MPI_Comm VerticalComm;
|
||||||
|
static MPI_Win VerticalWindow;
|
||||||
|
static int VerticalSize;
|
||||||
|
static int VerticalRank;
|
||||||
|
|
||||||
|
static std::vector<void *> VerticalShmBufs;
|
||||||
|
static std::vector<std::vector<int> > UniverseRanks;
|
||||||
|
static std::vector<int> UserCommunicatorToWorldRanks;
|
||||||
|
|
||||||
|
static MPI_Group WorldGroup, CachedGroup;
|
||||||
|
|
||||||
|
static void CommunicatorInit (MPI_Comm &communicator_world,
|
||||||
|
MPI_Comm &ShmComm,
|
||||||
|
void * &ShmCommBuf);
|
||||||
|
|
||||||
|
static void MapCommRankToWorldRank(int &hashtag, int & comm_world_peer,int tag, MPI_Comm comm,int commrank);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////
|
||||||
|
// routines for master proc must handle any communicator
|
||||||
|
/////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
static void QueueSend(int slave,void *buf, int bytes, int tag, MPI_Comm comm,int rank) {
|
||||||
|
// std::cout<< " Queueing send "<< bytes<< " slave "<< slave << " to comm "<<rank <<std::endl;
|
||||||
|
Slaves[slave].QueueCommand(COMMAND_ISEND,buf,bytes,tag,comm,rank);
|
||||||
|
// std::cout << "Queued send command to rank "<< rank<< " via "<<slave <<std::endl;
|
||||||
|
Slaves[slave].WakeUpDMA();
|
||||||
|
// std::cout << "Waking up DMA "<< slave<<std::endl;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void QueueRecv(int slave, void *buf, int bytes, int tag, MPI_Comm comm,int rank) {
|
||||||
|
// std::cout<< " Queueing recv "<< bytes<< " slave "<< slave << " from comm "<<rank <<std::endl;
|
||||||
|
Slaves[slave].QueueCommand(COMMAND_IRECV,buf,bytes,tag,comm,rank);
|
||||||
|
// std::cout << "Queued recv command from rank "<< rank<< " via "<<slave <<std::endl;
|
||||||
|
Slaves[slave].WakeUpDMA();
|
||||||
|
// std::cout << "Waking up DMA "<< slave<<std::endl;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void WaitAll() {
|
||||||
|
for(int s=1;s<VerticalSize;s++) {
|
||||||
|
// std::cout << "Waiting for slave "<< s<<std::endl;
|
||||||
|
Slaves[s].WaitAll();
|
||||||
|
}
|
||||||
|
// std::cout << " Wait all Complete "<<std::endl;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void GetWork(int nwork, int me, int & mywork, int & myoff,int units){
|
||||||
|
int basework = nwork/units;
|
||||||
|
int backfill = units-(nwork%units);
|
||||||
|
if ( me >= units ) {
|
||||||
|
mywork = myoff = 0;
|
||||||
|
} else {
|
||||||
|
mywork = (nwork+me)/units;
|
||||||
|
myoff = basework * me;
|
||||||
|
if ( me > backfill )
|
||||||
|
myoff+= (me-backfill);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void QueueMultiplexedSend(void *buf, int bytes, int tag, MPI_Comm comm,int rank) {
|
||||||
|
uint8_t * cbuf = (uint8_t *) buf;
|
||||||
|
int mywork, myoff, procs;
|
||||||
|
procs = VerticalSize-1;
|
||||||
|
for(int s=0;s<procs;s++) {
|
||||||
|
GetWork(bytes,s,mywork,myoff,procs);
|
||||||
|
QueueSend(s+1,&cbuf[myoff],mywork,tag,comm,rank);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static void QueueMultiplexedRecv(void *buf, int bytes, int tag, MPI_Comm comm,int rank) {
|
||||||
|
uint8_t * cbuf = (uint8_t *) buf;
|
||||||
|
int mywork, myoff, procs;
|
||||||
|
procs = VerticalSize-1;
|
||||||
|
for(int s=0;s<procs;s++) {
|
||||||
|
GetWork(bytes,s,mywork,myoff,procs);
|
||||||
|
QueueRecv(s+1,&cbuf[myoff],mywork,tag,comm,rank);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Info that is setup once and indept of cartesian layout
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
std::vector<Slave> MPIoffloadEngine::Slaves;
|
||||||
|
|
||||||
|
int MPIoffloadEngine::UniverseRank;
|
||||||
|
int MPIoffloadEngine::UniverseSize;
|
||||||
|
|
||||||
|
MPI_Comm MPIoffloadEngine::communicator_universe;
|
||||||
|
MPI_Comm MPIoffloadEngine::communicator_cached;
|
||||||
|
MPI_Group MPIoffloadEngine::WorldGroup;
|
||||||
|
MPI_Group MPIoffloadEngine::CachedGroup;
|
||||||
|
|
||||||
|
MPI_Comm MPIoffloadEngine::HorizontalComm;
|
||||||
|
int MPIoffloadEngine::HorizontalRank;
|
||||||
|
int MPIoffloadEngine::HorizontalSize;
|
||||||
|
|
||||||
|
MPI_Comm MPIoffloadEngine::VerticalComm;
|
||||||
|
int MPIoffloadEngine::VerticalSize;
|
||||||
|
int MPIoffloadEngine::VerticalRank;
|
||||||
|
MPI_Win MPIoffloadEngine::VerticalWindow;
|
||||||
|
std::vector<void *> MPIoffloadEngine::VerticalShmBufs;
|
||||||
|
std::vector<std::vector<int> > MPIoffloadEngine::UniverseRanks;
|
||||||
|
std::vector<int> MPIoffloadEngine::UserCommunicatorToWorldRanks;
|
||||||
|
|
||||||
|
int MPIoffloadEngine::ShmSetup = 0;
|
||||||
|
|
||||||
|
void MPIoffloadEngine::CommunicatorInit (MPI_Comm &communicator_world,
|
||||||
|
MPI_Comm &ShmComm,
|
||||||
|
void * &ShmCommBuf)
|
||||||
|
{
|
||||||
|
int flag;
|
||||||
|
assert(ShmSetup==0);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// Universe is all nodes prior to squadron grouping
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_universe);
|
||||||
|
MPI_Comm_rank(communicator_universe,&UniverseRank);
|
||||||
|
MPI_Comm_size(communicator_universe,&UniverseSize);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
// Split into groups that can share memory (Verticals)
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
#undef MPI_SHARED_MEM_DEBUG
|
||||||
|
#ifdef MPI_SHARED_MEM_DEBUG
|
||||||
|
MPI_Comm_split(communicator_universe,(UniverseRank/4),UniverseRank,&VerticalComm);
|
||||||
|
#else
|
||||||
|
MPI_Comm_split_type(communicator_universe, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&VerticalComm);
|
||||||
|
#endif
|
||||||
|
MPI_Comm_rank(VerticalComm ,&VerticalRank);
|
||||||
|
MPI_Comm_size(VerticalComm ,&VerticalSize);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// Split into horizontal groups by rank in squadron
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
MPI_Comm_split(communicator_universe,VerticalRank,UniverseRank,&HorizontalComm);
|
||||||
|
MPI_Comm_rank(HorizontalComm,&HorizontalRank);
|
||||||
|
MPI_Comm_size(HorizontalComm,&HorizontalSize);
|
||||||
|
assert(HorizontalSize*VerticalSize==UniverseSize);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// What is my place in the world
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
int WorldRank=0;
|
||||||
|
if(VerticalRank==0) WorldRank = HorizontalRank;
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&WorldRank,1,MPI_INT,MPI_SUM,VerticalComm);
|
||||||
|
assert(ierr==0);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Where is the world in the universe?
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
UniverseRanks = std::vector<std::vector<int> >(HorizontalSize,std::vector<int>(VerticalSize,0));
|
||||||
|
UniverseRanks[WorldRank][VerticalRank] = UniverseRank;
|
||||||
|
for(int w=0;w<HorizontalSize;w++){
|
||||||
|
ierr=MPI_Allreduce(MPI_IN_PLACE,&UniverseRanks[w][0],VerticalSize,MPI_INT,MPI_SUM,communicator_universe);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// allocate the shared window for our group, pass back Shm info to CartesianCommunicator
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
VerticalShmBufs.resize(VerticalSize);
|
||||||
|
|
||||||
|
#undef MPI_SHARED_MEM
|
||||||
|
#ifdef MPI_SHARED_MEM
|
||||||
|
ierr = MPI_Win_allocate_shared(CartesianCommunicator::MAX_MPI_SHM_BYTES,1,MPI_INFO_NULL,VerticalComm,&ShmCommBuf,&VerticalWindow);
|
||||||
|
ierr|= MPI_Win_lock_all (MPI_MODE_NOCHECK, VerticalWindow);
|
||||||
|
assert(ierr==0);
|
||||||
|
// std::cout<<"SHM "<<ShmCommBuf<<std::endl;
|
||||||
|
|
||||||
|
for(int r=0;r<VerticalSize;r++){
|
||||||
|
MPI_Aint sz;
|
||||||
|
int dsp_unit;
|
||||||
|
MPI_Win_shared_query (VerticalWindow, r, &sz, &dsp_unit, &VerticalShmBufs[r]);
|
||||||
|
// std::cout<<"SHM "<<r<<" " <<VerticalShmBufs[r]<<std::endl;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
char shm_name [NAME_MAX];
|
||||||
|
MPI_Barrier(VerticalComm);
|
||||||
|
|
||||||
|
if ( VerticalRank == 0 ) {
|
||||||
|
for(int r=0;r<VerticalSize;r++){
|
||||||
|
|
||||||
|
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES;
|
||||||
|
if ( r>0 ) size = sizeof(SlaveState);
|
||||||
|
|
||||||
|
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldRank,r);
|
||||||
|
|
||||||
|
shm_unlink(shm_name);
|
||||||
|
|
||||||
|
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0600);
|
||||||
|
if ( fd < 0 ) {
|
||||||
|
perror("failed shm_open");
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
ftruncate(fd, size);
|
||||||
|
|
||||||
|
VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||||
|
|
||||||
|
if ( VerticalShmBufs[r] == MAP_FAILED ) {
|
||||||
|
perror("failed mmap");
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t * check = (uint64_t *) VerticalShmBufs[r];
|
||||||
|
check[0] = WorldRank;
|
||||||
|
check[1] = r;
|
||||||
|
|
||||||
|
// std::cout<<"SHM "<<r<<" " <<VerticalShmBufs[r]<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Barrier(VerticalComm);
|
||||||
|
|
||||||
|
if ( VerticalRank != 0 ) {
|
||||||
|
for(int r=0;r<VerticalSize;r++){
|
||||||
|
|
||||||
|
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES ;
|
||||||
|
if ( r>0 ) size = sizeof(SlaveState);
|
||||||
|
|
||||||
|
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldRank,r);
|
||||||
|
|
||||||
|
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0600);
|
||||||
|
if ( fd<0 ) {
|
||||||
|
perror("failed shm_open");
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||||
|
|
||||||
|
uint64_t * check = (uint64_t *) VerticalShmBufs[r];
|
||||||
|
assert(check[0]== WorldRank);
|
||||||
|
assert(check[1]== r);
|
||||||
|
std::cerr<<"SHM "<<r<<" " <<VerticalShmBufs[r]<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
MPI_Barrier(VerticalComm);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// Map rank of leader on node in their in new world, to the
|
||||||
|
// rank in this vertical plane's horizontal communicator
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
communicator_world = HorizontalComm;
|
||||||
|
ShmComm = VerticalComm;
|
||||||
|
ShmCommBuf = VerticalShmBufs[0];
|
||||||
|
MPI_Comm_group (communicator_world, &WorldGroup);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
// Start the slave data movers
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
if ( VerticalRank != 0 ) {
|
||||||
|
Slave indentured;
|
||||||
|
indentured.Init( (SlaveState *) VerticalShmBufs[VerticalRank], VerticalComm, UniverseRank,VerticalRank);
|
||||||
|
indentured.SemInitExcl();// init semaphore in shared memory
|
||||||
|
MPI_Barrier(VerticalComm);
|
||||||
|
MPI_Barrier(VerticalComm);
|
||||||
|
indentured.EventLoop();
|
||||||
|
assert(0);
|
||||||
|
} else {
|
||||||
|
Slaves.resize(VerticalSize);
|
||||||
|
for(int i=1;i<VerticalSize;i++){
|
||||||
|
Slaves[i].Init((SlaveState *)VerticalShmBufs[i],VerticalComm, UniverseRanks[HorizontalRank][i],i);
|
||||||
|
}
|
||||||
|
MPI_Barrier(VerticalComm);
|
||||||
|
for(int i=1;i<VerticalSize;i++){
|
||||||
|
Slaves[i].SemInit();// init semaphore in shared memory
|
||||||
|
}
|
||||||
|
MPI_Barrier(VerticalComm);
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
// Verbose for now
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
ShmSetup=1;
|
||||||
|
|
||||||
|
if (UniverseRank == 0){
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Grid MPI-3 configuration: detected ";
|
||||||
|
std::cout<<UniverseSize << " Ranks " ;
|
||||||
|
std::cout<<HorizontalSize << " Nodes " ;
|
||||||
|
std::cout<<VerticalSize << " with ranks-per-node "<<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Grid MPI-3 configuration: using one lead process per node " << std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Grid MPI-3 configuration: reduced communicator has size " << HorizontalSize << std::endl;
|
||||||
|
|
||||||
|
for(int g=0;g<HorizontalSize;g++){
|
||||||
|
std::cout<<GridLogMessage<<" Node "<<g<<" led by MPI rank "<< UniverseRanks[g][0]<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int g=0;g<HorizontalSize;g++){
|
||||||
|
std::cout<<GridLogMessage<<" { ";
|
||||||
|
for(int s=0;s<VerticalSize;s++){
|
||||||
|
std::cout<< UniverseRanks[g][s];
|
||||||
|
if ( s<VerticalSize-1 ) {
|
||||||
|
std::cout<<",";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout<<" } "<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Map the communicator into communicator_world, and find the neighbour.
|
||||||
|
// Cache the mappings; cache size is 1.
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void MPIoffloadEngine::MapCommRankToWorldRank(int &hashtag, int & comm_world_peer,int tag, MPI_Comm comm,int rank) {
|
||||||
|
|
||||||
|
if ( comm == HorizontalComm ) {
|
||||||
|
comm_world_peer = rank;
|
||||||
|
// std::cout << " MapCommRankToWorldRank horiz " <<rank<<"->"<<comm_world_peer<<std::endl;
|
||||||
|
} else if ( comm == communicator_cached ) {
|
||||||
|
comm_world_peer = UserCommunicatorToWorldRanks[rank];
|
||||||
|
// std::cout << " MapCommRankToWorldRank cached " <<rank<<"->"<<comm_world_peer<<std::endl;
|
||||||
|
} else {
|
||||||
|
|
||||||
|
int size;
|
||||||
|
|
||||||
|
MPI_Comm_size(comm,&size);
|
||||||
|
|
||||||
|
UserCommunicatorToWorldRanks.resize(size);
|
||||||
|
|
||||||
|
std::vector<int> cached_ranks(size);
|
||||||
|
|
||||||
|
for(int r=0;r<size;r++) {
|
||||||
|
cached_ranks[r]=r;
|
||||||
|
}
|
||||||
|
|
||||||
|
communicator_cached=comm;
|
||||||
|
|
||||||
|
MPI_Comm_group(communicator_cached, &CachedGroup);
|
||||||
|
|
||||||
|
MPI_Group_translate_ranks(CachedGroup,size,&cached_ranks[0],WorldGroup, &UserCommunicatorToWorldRanks[0]);
|
||||||
|
|
||||||
|
comm_world_peer = UserCommunicatorToWorldRanks[rank];
|
||||||
|
// std::cout << " MapCommRankToWorldRank cache miss " <<rank<<"->"<<comm_world_peer<<std::endl;
|
||||||
|
|
||||||
|
assert(comm_world_peer != MPI_UNDEFINED);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert( (tag & (~0xFFFFL)) ==0);
|
||||||
|
|
||||||
|
uint64_t icomm = (uint64_t)comm;
|
||||||
|
int comm_hash = ((icomm>>0 )&0xFFFF)^((icomm>>16)&0xFFFF)
|
||||||
|
^ ((icomm>>32)&0xFFFF)^((icomm>>48)&0xFFFF);
|
||||||
|
|
||||||
|
// hashtag = (comm_hash<<15) | tag;
|
||||||
|
hashtag = tag;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
void Slave::Init(SlaveState * _state,MPI_Comm _squadron,int _universe_rank,int _vertical_rank)
|
||||||
|
{
|
||||||
|
squadron=_squadron;
|
||||||
|
universe_rank=_universe_rank;
|
||||||
|
vertical_rank=_vertical_rank;
|
||||||
|
state =_state;
|
||||||
|
// std::cout << "state "<<_state<<" comm "<<_squadron<<" universe_rank"<<universe_rank <<std::endl;
|
||||||
|
state->head = state->tail = state->start = 0;
|
||||||
|
base = (uint64_t)MPIoffloadEngine::VerticalShmBufs[0];
|
||||||
|
int rank; MPI_Comm_rank(_squadron,&rank);
|
||||||
|
}
|
||||||
|
#define PERI_PLUS(A) ( (A+1)%pool )
|
||||||
|
int Slave::Event (void) {
|
||||||
|
|
||||||
|
static int tail_last;
|
||||||
|
static int head_last;
|
||||||
|
static int start_last;
|
||||||
|
int ierr;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////
|
||||||
|
// Try to advance the start pointers
|
||||||
|
////////////////////////////////////////////////////
|
||||||
|
int s=state->start;
|
||||||
|
if ( s != state->head ) {
|
||||||
|
switch ( state->Descrs[s].command ) {
|
||||||
|
case COMMAND_ISEND:
|
||||||
|
/*
|
||||||
|
std::cout<< " Send "<<s << " ptr "<< state<<" "<< state->Descrs[s].buf<< "["<<state->Descrs[s].bytes<<"]"
|
||||||
|
<< " to " << state->Descrs[s].rank<< " tag" << state->Descrs[s].tag
|
||||||
|
<< " Comm " << MPIoffloadEngine::communicator_universe<< " me " <<universe_rank<< std::endl;
|
||||||
|
*/
|
||||||
|
ierr = MPI_Isend((void *)(state->Descrs[s].buf+base),
|
||||||
|
state->Descrs[s].bytes,
|
||||||
|
MPI_CHAR,
|
||||||
|
state->Descrs[s].rank,
|
||||||
|
state->Descrs[s].tag,
|
||||||
|
MPIoffloadEngine::communicator_universe,
|
||||||
|
(MPI_Request *)&state->Descrs[s].request);
|
||||||
|
assert(ierr==0);
|
||||||
|
state->start = PERI_PLUS(s);
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case COMMAND_IRECV:
|
||||||
|
/*
|
||||||
|
std::cout<< " Recv "<<s << " ptr "<< state<<" "<< state->Descrs[s].buf<< "["<<state->Descrs[s].bytes<<"]"
|
||||||
|
<< " from " << state->Descrs[s].rank<< " tag" << state->Descrs[s].tag
|
||||||
|
<< " Comm " << MPIoffloadEngine::communicator_universe<< " me "<< universe_rank<< std::endl;
|
||||||
|
*/
|
||||||
|
ierr=MPI_Irecv((void *)(state->Descrs[s].buf+base),
|
||||||
|
state->Descrs[s].bytes,
|
||||||
|
MPI_CHAR,
|
||||||
|
state->Descrs[s].rank,
|
||||||
|
state->Descrs[s].tag,
|
||||||
|
MPIoffloadEngine::communicator_universe,
|
||||||
|
(MPI_Request *)&state->Descrs[s].request);
|
||||||
|
|
||||||
|
// std::cout<< " Request is "<<state->Descrs[s].request<<std::endl;
|
||||||
|
// std::cout<< " Request0 is "<<state->Descrs[0].request<<std::endl;
|
||||||
|
assert(ierr==0);
|
||||||
|
state->start = PERI_PLUS(s);
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case COMMAND_WAITALL:
|
||||||
|
|
||||||
|
for(int t=state->tail;t!=s; t=PERI_PLUS(t) ){
|
||||||
|
MPI_Wait((MPI_Request *)&state->Descrs[t].request,MPI_STATUS_IGNORE);
|
||||||
|
};
|
||||||
|
s=PERI_PLUS(s);
|
||||||
|
state->start = s;
|
||||||
|
state->tail = s;
|
||||||
|
|
||||||
|
WakeUpCompute();
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// External interaction with the queue
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
uint64_t Slave::QueueCommand(int command,void *buf, int bytes, int tag, MPI_Comm comm,int commrank)
|
||||||
|
{
|
||||||
|
/////////////////////////////////////////
|
||||||
|
// Spin; if FIFO is full until not full
|
||||||
|
/////////////////////////////////////////
|
||||||
|
int head =state->head;
|
||||||
|
int next = PERI_PLUS(head);
|
||||||
|
|
||||||
|
// Set up descriptor
|
||||||
|
int worldrank;
|
||||||
|
int hashtag;
|
||||||
|
MPI_Comm communicator;
|
||||||
|
MPI_Request request;
|
||||||
|
|
||||||
|
MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,tag,comm,commrank);
|
||||||
|
|
||||||
|
uint64_t relative= (uint64_t)buf - base;
|
||||||
|
state->Descrs[head].buf = relative;
|
||||||
|
state->Descrs[head].bytes = bytes;
|
||||||
|
state->Descrs[head].rank = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank];
|
||||||
|
state->Descrs[head].tag = hashtag;
|
||||||
|
state->Descrs[head].command= command;
|
||||||
|
|
||||||
|
/*
|
||||||
|
if ( command == COMMAND_ISEND ) {
|
||||||
|
std::cout << "QueueSend from "<< universe_rank <<" to commrank " << commrank
|
||||||
|
<< " to worldrank " << worldrank <<std::endl;
|
||||||
|
std::cout << " via VerticalRank "<< vertical_rank <<" to universerank " << MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]<<std::endl;
|
||||||
|
std::cout << " QueueCommand "<<buf<<"["<<bytes<<"]" << std::endl;
|
||||||
|
}
|
||||||
|
if ( command == COMMAND_IRECV ) {
|
||||||
|
std::cout << "QueueRecv on "<< universe_rank <<" from commrank " << commrank
|
||||||
|
<< " from worldrank " << worldrank <<std::endl;
|
||||||
|
std::cout << " via VerticalRank "<< vertical_rank <<" from universerank " << MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]<<std::endl;
|
||||||
|
std::cout << " QueueSend "<<buf<<"["<<bytes<<"]" << std::endl;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
// Block until FIFO has space
|
||||||
|
while( state->tail==next );
|
||||||
|
|
||||||
|
// Msync on weak order architectures
|
||||||
|
// Advance pointer
|
||||||
|
state->head = next;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Info that is setup once and indept of cartesian layout
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
MPI_Comm CartesianCommunicator::communicator_world;
|
||||||
|
|
||||||
|
void CartesianCommunicator::Init(int *argc, char ***argv)
|
||||||
|
{
|
||||||
|
int flag;
|
||||||
|
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||||
|
if ( !flag ) {
|
||||||
|
MPI_Init(argc,argv);
|
||||||
|
}
|
||||||
|
communicator_world = MPI_COMM_WORLD;
|
||||||
|
MPI_Comm ShmComm;
|
||||||
|
MPIoffloadEngine::CommunicatorInit (communicator_world,ShmComm,ShmCommBuf);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||||
|
{
|
||||||
|
int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
|
||||||
|
{
|
||||||
|
int rank;
|
||||||
|
int ierr=MPI_Cart_rank (communicator, &coor[0], &rank);
|
||||||
|
assert(ierr==0);
|
||||||
|
return rank;
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
|
||||||
|
{
|
||||||
|
coor.resize(_ndimension);
|
||||||
|
int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||||
|
{
|
||||||
|
_ndimension = processors.size();
|
||||||
|
std::vector<int> periodic(_ndimension,1);
|
||||||
|
|
||||||
|
_Nprocessors=1;
|
||||||
|
_processors = processors;
|
||||||
|
|
||||||
|
for(int i=0;i<_ndimension;i++){
|
||||||
|
_Nprocessors*=_processors[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
int Size;
|
||||||
|
MPI_Comm_size(communicator_world,&Size);
|
||||||
|
assert(Size==_Nprocessors);
|
||||||
|
|
||||||
|
_processor_coor.resize(_ndimension);
|
||||||
|
MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator);
|
||||||
|
MPI_Comm_rank (communicator,&_processor);
|
||||||
|
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
||||||
|
};
|
||||||
|
|
||||||
|
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(uint64_t &u){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(float &f){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(float *f,int N)
|
||||||
|
{
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(double &d)
|
||||||
|
{
|
||||||
|
int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(double *d,int N)
|
||||||
|
{
|
||||||
|
int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Basic Halo comms primitive
|
||||||
|
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||||
|
int dest,
|
||||||
|
void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
std::vector<CommsRequest_t> reqs(0);
|
||||||
|
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
||||||
|
SendToRecvFromComplete(reqs);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||||
|
void *recv,
|
||||||
|
int sender,
|
||||||
|
int receiver,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
MPI_Status stat;
|
||||||
|
assert(sender != receiver);
|
||||||
|
int tag = sender;
|
||||||
|
if ( _processor == sender ) {
|
||||||
|
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
|
||||||
|
}
|
||||||
|
if ( _processor == receiver ) {
|
||||||
|
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Basic Halo comms primitive
|
||||||
|
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
|
void *xmit,
|
||||||
|
int dest,
|
||||||
|
void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
MPI_Request xrq;
|
||||||
|
MPI_Request rrq;
|
||||||
|
int rank = _processor;
|
||||||
|
int ierr;
|
||||||
|
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||||
|
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||||
|
|
||||||
|
assert(ierr==0);
|
||||||
|
|
||||||
|
list.push_back(xrq);
|
||||||
|
list.push_back(rrq);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
|
void *xmit,
|
||||||
|
int dest,
|
||||||
|
void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
uint64_t xmit_i = (uint64_t) xmit;
|
||||||
|
uint64_t recv_i = (uint64_t) recv;
|
||||||
|
uint64_t shm = (uint64_t) ShmCommBuf;
|
||||||
|
// assert xmit and recv lie in shared memory region
|
||||||
|
assert( (xmit_i >= shm) && (xmit_i+bytes <= shm+MAX_MPI_SHM_BYTES) );
|
||||||
|
assert( (recv_i >= shm) && (recv_i+bytes <= shm+MAX_MPI_SHM_BYTES) );
|
||||||
|
assert(from!=_processor);
|
||||||
|
assert(dest!=_processor);
|
||||||
|
MPIoffloadEngine::QueueMultiplexedSend(xmit,bytes,_processor,communicator,dest);
|
||||||
|
MPIoffloadEngine::QueueMultiplexedRecv(recv,bytes,from,communicator,from);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||||
|
{
|
||||||
|
MPIoffloadEngine::WaitAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::StencilBarrier(void)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||||
|
{
|
||||||
|
int nreq=list.size();
|
||||||
|
std::vector<MPI_Status> status(nreq);
|
||||||
|
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::Barrier(void)
|
||||||
|
{
|
||||||
|
int ierr = MPI_Barrier(communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||||
|
{
|
||||||
|
int ierr=MPI_Bcast(data,
|
||||||
|
bytes,
|
||||||
|
MPI_BYTE,
|
||||||
|
root,
|
||||||
|
communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||||
|
{
|
||||||
|
int ierr= MPI_Bcast(data,
|
||||||
|
bytes,
|
||||||
|
MPI_BYTE,
|
||||||
|
root,
|
||||||
|
communicator_world);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; }
|
||||||
|
|
||||||
|
void *CartesianCommunicator::ShmBuffer(int rank) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
@ -1,6 +1,42 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/communicator/Communicator_none.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#include "Grid.h"
|
#include "Grid.h"
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Info that is setup once and indept of cartesian layout
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
void CartesianCommunicator::Init(int *argc, char *** arv)
|
||||||
|
{
|
||||||
|
ShmInitGeneric();
|
||||||
|
}
|
||||||
|
|
||||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||||
{
|
{
|
||||||
_processors = processors;
|
_processors = processors;
|
||||||
@ -20,17 +56,14 @@ void CartesianCommunicator::GlobalSum(float &){}
|
|||||||
void CartesianCommunicator::GlobalSumVector(float *,int N){}
|
void CartesianCommunicator::GlobalSumVector(float *,int N){}
|
||||||
void CartesianCommunicator::GlobalSum(double &){}
|
void CartesianCommunicator::GlobalSum(double &){}
|
||||||
void CartesianCommunicator::GlobalSum(uint32_t &){}
|
void CartesianCommunicator::GlobalSum(uint32_t &){}
|
||||||
|
void CartesianCommunicator::GlobalSum(uint64_t &){}
|
||||||
void CartesianCommunicator::GlobalSumVector(double *,int N){}
|
void CartesianCommunicator::GlobalSumVector(double *,int N){}
|
||||||
|
|
||||||
void CartesianCommunicator::RecvFrom(void *recv,
|
void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||||
int recv_from_rank,
|
void *recv,
|
||||||
int bytes)
|
int xmit_to_rank,
|
||||||
{
|
int recv_from_rank,
|
||||||
assert(0);
|
int bytes)
|
||||||
}
|
|
||||||
void CartesianCommunicator::SendTo(void *xmit,
|
|
||||||
int xmit_to_rank,
|
|
||||||
int bytes)
|
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
@ -59,30 +92,17 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
|
|||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CartesianCommunicator::Barrier(void)
|
int CartesianCommunicator::RankWorld(void){return 0;}
|
||||||
{
|
void CartesianCommunicator::Barrier(void){}
|
||||||
}
|
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}
|
||||||
|
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }
|
||||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) { return 0;}
|
||||||
{
|
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){ coor = _processor_coor ;}
|
||||||
}
|
|
||||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||||
{
|
{
|
||||||
source =0;
|
source =0;
|
||||||
dest=0;
|
dest=0;
|
||||||
}
|
}
|
||||||
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
337
lib/communicator/Communicator_shmem.cc
Normal file
337
lib/communicator/Communicator_shmem.cc
Normal file
@ -0,0 +1,337 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/communicator/Communicator_shmem.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include "Grid.h"
|
||||||
|
#include <mpp/shmem.h>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
// Should error check all MPI calls.
|
||||||
|
#define SHMEM_VET(addr)
|
||||||
|
|
||||||
|
#define SHMEM_VET_DEBUG(addr) { \
|
||||||
|
if ( ! shmem_addr_accessible(addr,_processor) ) {\
|
||||||
|
std::fprintf(stderr,"%d Inaccessible shmem address %lx %s %s\n",_processor,addr,__FUNCTION__,#addr); \
|
||||||
|
BACKTRACEFILE(); \
|
||||||
|
}\
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Info that is setup once and indept of cartesian layout
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
typedef struct HandShake_t {
|
||||||
|
uint64_t seq_local;
|
||||||
|
uint64_t seq_remote;
|
||||||
|
} HandShake;
|
||||||
|
|
||||||
|
std::array<long,_SHMEM_REDUCE_SYNC_SIZE> make_psync_init(void) {
|
||||||
|
array<long,_SHMEM_REDUCE_SYNC_SIZE> ret;
|
||||||
|
ret.fill(SHMEM_SYNC_VALUE);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync_init = make_psync_init();
|
||||||
|
|
||||||
|
static Vector< HandShake > XConnections;
|
||||||
|
static Vector< HandShake > RConnections;
|
||||||
|
|
||||||
|
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||||
|
shmem_init();
|
||||||
|
XConnections.resize(shmem_n_pes());
|
||||||
|
RConnections.resize(shmem_n_pes());
|
||||||
|
for(int pe =0 ; pe<shmem_n_pes();pe++){
|
||||||
|
XConnections[pe].seq_local = 0;
|
||||||
|
XConnections[pe].seq_remote= 0;
|
||||||
|
RConnections[pe].seq_local = 0;
|
||||||
|
RConnections[pe].seq_remote= 0;
|
||||||
|
}
|
||||||
|
shmem_barrier_all();
|
||||||
|
ShmInitGeneric();
|
||||||
|
}
|
||||||
|
|
||||||
|
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||||
|
{
|
||||||
|
_ndimension = processors.size();
|
||||||
|
std::vector<int> periodic(_ndimension,1);
|
||||||
|
|
||||||
|
_Nprocessors=1;
|
||||||
|
_processors = processors;
|
||||||
|
_processor_coor.resize(_ndimension);
|
||||||
|
|
||||||
|
_processor = shmem_my_pe();
|
||||||
|
|
||||||
|
Lexicographic::CoorFromIndex(_processor_coor,_processor,_processors);
|
||||||
|
|
||||||
|
for(int i=0;i<_ndimension;i++){
|
||||||
|
_Nprocessors*=_processors[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
int Size = shmem_n_pes();
|
||||||
|
|
||||||
|
|
||||||
|
assert(Size==_Nprocessors);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||||
|
static long long source ;
|
||||||
|
static long long dest ;
|
||||||
|
static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
||||||
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
|
|
||||||
|
// int nreduce=1;
|
||||||
|
// int pestart=0;
|
||||||
|
// int logStride=0;
|
||||||
|
|
||||||
|
source = u;
|
||||||
|
dest = 0;
|
||||||
|
shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
|
||||||
|
shmem_barrier_all(); // necessary?
|
||||||
|
u = dest;
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(uint64_t &u){
|
||||||
|
static long long source ;
|
||||||
|
static long long dest ;
|
||||||
|
static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
||||||
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
|
|
||||||
|
// int nreduce=1;
|
||||||
|
// int pestart=0;
|
||||||
|
// int logStride=0;
|
||||||
|
|
||||||
|
source = u;
|
||||||
|
dest = 0;
|
||||||
|
shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
|
||||||
|
shmem_barrier_all(); // necessary?
|
||||||
|
u = dest;
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(float &f){
|
||||||
|
static float source ;
|
||||||
|
static float dest ;
|
||||||
|
static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
||||||
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
|
|
||||||
|
source = f;
|
||||||
|
dest =0.0;
|
||||||
|
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
|
||||||
|
f = dest;
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(float *f,int N)
|
||||||
|
{
|
||||||
|
static float source ;
|
||||||
|
static float dest = 0 ;
|
||||||
|
static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
||||||
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
|
|
||||||
|
if ( shmem_addr_accessible(f,_processor) ){
|
||||||
|
shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
dest =0.0;
|
||||||
|
source = f[i];
|
||||||
|
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
|
||||||
|
f[i] = dest;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(double &d)
|
||||||
|
{
|
||||||
|
static double source;
|
||||||
|
static double dest ;
|
||||||
|
static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
||||||
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
|
|
||||||
|
source = d;
|
||||||
|
dest = 0;
|
||||||
|
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
|
||||||
|
d = dest;
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(double *d,int N)
|
||||||
|
{
|
||||||
|
static double source ;
|
||||||
|
static double dest ;
|
||||||
|
static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
||||||
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
|
|
||||||
|
|
||||||
|
if ( shmem_addr_accessible(d,_processor) ){
|
||||||
|
shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
source = d[i];
|
||||||
|
dest =0.0;
|
||||||
|
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
|
||||||
|
d[i] = dest;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||||
|
{
|
||||||
|
std::vector<int> coor = _processor_coor;
|
||||||
|
|
||||||
|
assert(std::abs(shift) <_processors[dim]);
|
||||||
|
|
||||||
|
coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim];
|
||||||
|
Lexicographic::IndexFromCoor(coor,source,_processors);
|
||||||
|
|
||||||
|
coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim];
|
||||||
|
Lexicographic::IndexFromCoor(coor,dest,_processors);
|
||||||
|
|
||||||
|
}
|
||||||
|
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
|
||||||
|
{
|
||||||
|
int rank;
|
||||||
|
Lexicographic::IndexFromCoor(coor,rank,_processors);
|
||||||
|
return rank;
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
|
||||||
|
{
|
||||||
|
Lexicographic::CoorFromIndex(coor,rank,_processors);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Basic Halo comms primitive
|
||||||
|
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||||
|
int dest,
|
||||||
|
void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
SHMEM_VET(xmit);
|
||||||
|
SHMEM_VET(recv);
|
||||||
|
std::vector<CommsRequest_t> reqs(0);
|
||||||
|
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
||||||
|
SendToRecvFromComplete(reqs);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||||
|
void *recv,
|
||||||
|
int sender,
|
||||||
|
int receiver,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
static uint64_t seq;
|
||||||
|
|
||||||
|
assert(recv!=xmit);
|
||||||
|
volatile HandShake *RecvSeq = (volatile HandShake *) & RConnections[sender];
|
||||||
|
volatile HandShake *SendSeq = (volatile HandShake *) & XConnections[receiver];
|
||||||
|
|
||||||
|
if ( _processor == sender ) {
|
||||||
|
|
||||||
|
// Check he has posted a receive
|
||||||
|
while(SendSeq->seq_remote == SendSeq->seq_local);
|
||||||
|
|
||||||
|
// Advance our send count
|
||||||
|
seq = ++(SendSeq->seq_local);
|
||||||
|
|
||||||
|
// Send this packet
|
||||||
|
SHMEM_VET(recv);
|
||||||
|
shmem_putmem(recv,xmit,bytes,receiver);
|
||||||
|
shmem_fence();
|
||||||
|
|
||||||
|
//Notify him we're done
|
||||||
|
shmem_putmem((void *)&(RecvSeq->seq_remote),&seq,sizeof(seq),receiver);
|
||||||
|
shmem_fence();
|
||||||
|
}
|
||||||
|
if ( _processor == receiver ) {
|
||||||
|
|
||||||
|
// Post a receive
|
||||||
|
seq = ++(RecvSeq->seq_local);
|
||||||
|
shmem_putmem((void *)&(SendSeq->seq_remote),&seq,sizeof(seq),sender);
|
||||||
|
|
||||||
|
// Now wait until he has advanced our reception counter
|
||||||
|
while(RecvSeq->seq_remote != RecvSeq->seq_local);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Basic Halo comms primitive
|
||||||
|
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
|
void *xmit,
|
||||||
|
int dest,
|
||||||
|
void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
SHMEM_VET(xmit);
|
||||||
|
SHMEM_VET(recv);
|
||||||
|
// shmem_putmem_nb(recv,xmit,bytes,dest,NULL);
|
||||||
|
shmem_putmem(recv,xmit,bytes,dest);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||||
|
{
|
||||||
|
// shmem_quiet(); // I'm done
|
||||||
|
shmem_barrier_all();// He's done too
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::Barrier(void)
|
||||||
|
{
|
||||||
|
shmem_barrier_all();
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||||
|
{
|
||||||
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
|
static uint32_t word;
|
||||||
|
uint32_t *array = (uint32_t *) data;
|
||||||
|
assert( (bytes % 4)==0);
|
||||||
|
int words = bytes/4;
|
||||||
|
|
||||||
|
if ( shmem_addr_accessible(data,_processor) ){
|
||||||
|
shmem_broadcast32(data,data,words,root,0,0,shmem_n_pes(),psync);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int w=0;w<words;w++){
|
||||||
|
word = array[w];
|
||||||
|
shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync);
|
||||||
|
if ( shmem_my_pe() != root ) {
|
||||||
|
array[w] = word;
|
||||||
|
}
|
||||||
|
shmem_barrier_all();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||||
|
{
|
||||||
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
|
static uint32_t word;
|
||||||
|
uint32_t *array = (uint32_t *) data;
|
||||||
|
assert( (bytes % 4)==0);
|
||||||
|
int words = bytes/4;
|
||||||
|
|
||||||
|
for(int w=0;w<words;w++){
|
||||||
|
word = array[w];
|
||||||
|
shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync);
|
||||||
|
if ( shmem_my_pe() != root ) {
|
||||||
|
array[w]= word;
|
||||||
|
}
|
||||||
|
shmem_barrier_all();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -1,3 +1,31 @@
|
|||||||
|
|
||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/cshift/Cshift_common.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef _GRID_CSHIFT_COMMON_H_
|
#ifndef _GRID_CSHIFT_COMMON_H_
|
||||||
#define _GRID_CSHIFT_COMMON_H_
|
#define _GRID_CSHIFT_COMMON_H_
|
||||||
|
|
||||||
@ -8,7 +36,7 @@ class SimpleCompressor {
|
|||||||
public:
|
public:
|
||||||
void Point(int) {};
|
void Point(int) {};
|
||||||
|
|
||||||
vobj operator() (const vobj &arg,int dimension,int plane,int osite,GridBase *grid) {
|
vobj operator() (const vobj &arg) {
|
||||||
return arg;
|
return arg;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -17,7 +45,7 @@ public:
|
|||||||
// Gather for when there is no need to SIMD split with compression
|
// Gather for when there is no need to SIMD split with compression
|
||||||
///////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////
|
||||||
template<class vobj,class cobj,class compressor> void
|
template<class vobj,class cobj,class compressor> void
|
||||||
Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress)
|
Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0)
|
||||||
{
|
{
|
||||||
int rd = rhs._grid->_rdimensions[dimension];
|
int rd = rhs._grid->_rdimensions[dimension];
|
||||||
|
|
||||||
@ -30,26 +58,32 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
|
|||||||
int e1=rhs._grid->_slice_nblock[dimension];
|
int e1=rhs._grid->_slice_nblock[dimension];
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
|
|
||||||
|
int stride=rhs._grid->_slice_stride[dimension];
|
||||||
if ( cbmask == 0x3 ) {
|
if ( cbmask == 0x3 ) {
|
||||||
PARALLEL_NESTED_LOOP2
|
PARALLEL_NESTED_LOOP2
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o = n*rhs._grid->_slice_stride[dimension];
|
int o = n*stride;
|
||||||
int bo = n*rhs._grid->_slice_block[dimension];
|
int bo = n*e2;
|
||||||
buffer[bo+b]=compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
|
buffer[off+bo+b]=compress(rhs._odata[so+o+b]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int bo=0;
|
int bo=0;
|
||||||
|
std::vector<std::pair<int,int> > table;
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o = n*rhs._grid->_slice_stride[dimension];
|
int o = n*stride;
|
||||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
int ocb=1<<rhs._grid->CheckerBoardFromOindexTable(o+b);
|
||||||
if ( ocb &cbmask ) {
|
if ( ocb &cbmask ) {
|
||||||
buffer[bo++]=compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
|
table.push_back(std::pair<int,int> (bo++,o+b));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int i=0;i<table.size();i++){
|
||||||
|
buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -70,16 +104,17 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
|
|||||||
|
|
||||||
int e1=rhs._grid->_slice_nblock[dimension];
|
int e1=rhs._grid->_slice_nblock[dimension];
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
|
int n1=rhs._grid->_slice_stride[dimension];
|
||||||
|
int n2=rhs._grid->_slice_block[dimension];
|
||||||
if ( cbmask ==0x3){
|
if ( cbmask ==0x3){
|
||||||
PARALLEL_NESTED_LOOP2
|
PARALLEL_NESTED_LOOP2
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
|
|
||||||
int o=n*rhs._grid->_slice_stride[dimension];
|
int o = n*n1;
|
||||||
int offset = b+n*rhs._grid->_slice_block[dimension];
|
int offset = b+n*n2;
|
||||||
|
cobj temp =compress(rhs._odata[so+o+b]);
|
||||||
|
|
||||||
cobj temp =compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
|
|
||||||
extract<cobj>(temp,pointers,offset);
|
extract<cobj>(temp,pointers,offset);
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -87,6 +122,7 @@ PARALLEL_NESTED_LOOP2
|
|||||||
} else {
|
} else {
|
||||||
|
|
||||||
assert(0); //Fixme think this is buggy
|
assert(0); //Fixme think this is buggy
|
||||||
|
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o=n*rhs._grid->_slice_stride[dimension];
|
int o=n*rhs._grid->_slice_stride[dimension];
|
||||||
@ -94,7 +130,7 @@ PARALLEL_NESTED_LOOP2
|
|||||||
int offset = b+n*rhs._grid->_slice_block[dimension];
|
int offset = b+n*rhs._grid->_slice_block[dimension];
|
||||||
|
|
||||||
if ( ocb & cbmask ) {
|
if ( ocb & cbmask ) {
|
||||||
cobj temp =compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
|
cobj temp =compress(rhs._odata[so+o+b]);
|
||||||
extract<cobj>(temp,pointers,offset);
|
extract<cobj>(temp,pointers,offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -105,7 +141,7 @@ PARALLEL_NESTED_LOOP2
|
|||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// Gather for when there is no need to SIMD split
|
// Gather for when there is no need to SIMD split
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer, int dimension,int plane,int cbmask)
|
template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
|
||||||
{
|
{
|
||||||
SimpleCompressor<vobj> dontcompress;
|
SimpleCompressor<vobj> dontcompress;
|
||||||
Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress);
|
Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress);
|
||||||
@ -123,7 +159,7 @@ template<class vobj> void Gather_plane_extract(const Lattice<vobj> &rhs,std::vec
|
|||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// Scatter for when there is no need to SIMD split
|
// Scatter for when there is no need to SIMD split
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer, int dimension,int plane,int cbmask)
|
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
|
||||||
{
|
{
|
||||||
int rd = rhs._grid->_rdimensions[dimension];
|
int rd = rhs._grid->_rdimensions[dimension];
|
||||||
|
|
||||||
@ -216,13 +252,13 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
|
|||||||
|
|
||||||
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
|
int stride = rhs._grid->_slice_stride[dimension];
|
||||||
if(cbmask == 0x3 ){
|
if(cbmask == 0x3 ){
|
||||||
PARALLEL_NESTED_LOOP2
|
PARALLEL_NESTED_LOOP2
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
|
|
||||||
int o =n*rhs._grid->_slice_stride[dimension]+b;
|
int o =n*stride+b;
|
||||||
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
||||||
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
||||||
}
|
}
|
||||||
@ -232,7 +268,7 @@ PARALLEL_NESTED_LOOP2
|
|||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
|
|
||||||
int o =n*rhs._grid->_slice_stride[dimension]+b;
|
int o =n*stride+b;
|
||||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
|
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
|
||||||
if ( ocb&cbmask ) {
|
if ( ocb&cbmask ) {
|
||||||
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
||||||
@ -258,11 +294,12 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
|
|||||||
|
|
||||||
int e1=rhs._grid->_slice_nblock[dimension];
|
int e1=rhs._grid->_slice_nblock[dimension];
|
||||||
int e2=rhs._grid->_slice_block [dimension];
|
int e2=rhs._grid->_slice_block [dimension];
|
||||||
|
int stride = rhs._grid->_slice_stride[dimension];
|
||||||
PARALLEL_NESTED_LOOP2
|
PARALLEL_NESTED_LOOP2
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
|
|
||||||
int o =n*rhs._grid->_slice_stride[dimension];
|
int o =n*stride;
|
||||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
|
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
|
||||||
if ( ocb&cbmask ) {
|
if ( ocb&cbmask ) {
|
||||||
permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type);
|
permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type);
|
||||||
@ -296,6 +333,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
|
|||||||
int rd = grid->_rdimensions[dimension];
|
int rd = grid->_rdimensions[dimension];
|
||||||
int ld = grid->_ldimensions[dimension];
|
int ld = grid->_ldimensions[dimension];
|
||||||
int gd = grid->_gdimensions[dimension];
|
int gd = grid->_gdimensions[dimension];
|
||||||
|
int ly = grid->_simd_layout[dimension];
|
||||||
|
|
||||||
// Map to always positive shift modulo global full dimension.
|
// Map to always positive shift modulo global full dimension.
|
||||||
shift = (shift+fd)%fd;
|
shift = (shift+fd)%fd;
|
||||||
@ -304,6 +342,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
|
|||||||
// the permute type
|
// the permute type
|
||||||
int permute_dim =grid->PermuteDim(dimension);
|
int permute_dim =grid->PermuteDim(dimension);
|
||||||
int permute_type=grid->PermuteType(dimension);
|
int permute_type=grid->PermuteType(dimension);
|
||||||
|
int permute_type_dist;
|
||||||
|
|
||||||
for(int x=0;x<rd;x++){
|
for(int x=0;x<rd;x++){
|
||||||
|
|
||||||
@ -315,15 +354,31 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
|
|||||||
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
||||||
int sx = (x+sshift)%rd;
|
int sx = (x+sshift)%rd;
|
||||||
|
|
||||||
|
// FIXME : This must change where we have a
|
||||||
|
// Rotate slice.
|
||||||
|
|
||||||
|
// Document how this works ; why didn't I do this when I first wrote it...
|
||||||
|
// wrap is whether sshift > rd.
|
||||||
|
// num is sshift mod rd.
|
||||||
|
//
|
||||||
int permute_slice=0;
|
int permute_slice=0;
|
||||||
if(permute_dim){
|
if(permute_dim){
|
||||||
int wrap = sshift/rd;
|
int wrap = sshift/rd;
|
||||||
int num = sshift%rd;
|
int num = sshift%rd;
|
||||||
|
|
||||||
if ( x< rd-num ) permute_slice=wrap;
|
if ( x< rd-num ) permute_slice=wrap;
|
||||||
else permute_slice = 1-wrap;
|
else permute_slice = (wrap+1)%ly;
|
||||||
|
|
||||||
|
if ( (ly>2) && (permute_slice) ) {
|
||||||
|
assert(permute_type & RotateBit);
|
||||||
|
permute_type_dist = permute_type|permute_slice;
|
||||||
|
} else {
|
||||||
|
permute_type_dist = permute_type;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type);
|
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
|
||||||
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,3 +1,31 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/cshift/Cshift_mpi.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef _GRID_CSHIFT_MPI_H_
|
#ifndef _GRID_CSHIFT_MPI_H_
|
||||||
#define _GRID_CSHIFT_MPI_H_
|
#define _GRID_CSHIFT_MPI_H_
|
||||||
|
|
||||||
@ -91,8 +119,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
assert(shift<fd);
|
assert(shift<fd);
|
||||||
|
|
||||||
int buffer_size = rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension];
|
int buffer_size = rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension];
|
||||||
std::vector<vobj,alignedAllocator<vobj> > send_buf(buffer_size);
|
commVector<vobj> send_buf(buffer_size);
|
||||||
std::vector<vobj,alignedAllocator<vobj> > recv_buf(buffer_size);
|
commVector<vobj> recv_buf(buffer_size);
|
||||||
|
|
||||||
int cb= (cbmask==0x2)? Odd : Even;
|
int cb= (cbmask==0x2)? Odd : Even;
|
||||||
int sshift= rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
int sshift= rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
||||||
@ -163,11 +191,12 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
||||||
int words = sizeof(vobj)/sizeof(vector_type);
|
int words = sizeof(vobj)/sizeof(vector_type);
|
||||||
|
|
||||||
std::vector<std::vector<scalar_object> > send_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
|
std::vector<commVector<scalar_object> > send_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
|
||||||
std::vector<std::vector<scalar_object> > recv_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
|
std::vector<commVector<scalar_object> > recv_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
|
||||||
|
|
||||||
int bytes = buffer_size*sizeof(scalar_object);
|
int bytes = buffer_size*sizeof(scalar_object);
|
||||||
|
|
||||||
std::vector<scalar_object *> pointers(Nsimd); //
|
std::vector<scalar_object *> pointers(Nsimd); //
|
||||||
std::vector<scalar_object *> rpointers(Nsimd); // received pointers
|
std::vector<scalar_object *> rpointers(Nsimd); // received pointers
|
||||||
|
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/cshift/Cshift_none.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef _GRID_CSHIFT_NONE_H_
|
#ifndef _GRID_CSHIFT_NONE_H_
|
||||||
#define _GRID_CSHIFT_NONE_H_
|
#define _GRID_CSHIFT_NONE_H_
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
@ -1,44 +1,74 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/lattice/Lattice_ET.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: neo <cossu@post.kek.jp>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_LATTICE_ET_H
|
#ifndef GRID_LATTICE_ET_H
|
||||||
#define GRID_LATTICE_ET_H
|
#define GRID_LATTICE_ET_H
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
#include <typeinfo>
|
#include <typeinfo>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
// Predicated where support
|
// Predicated where support
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
template<class iobj,class vobj,class robj>
|
template <class iobj, class vobj, class robj>
|
||||||
inline vobj predicatedWhere(const iobj &predicate,const vobj &iftrue,const robj &iffalse) {
|
inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue,
|
||||||
|
const robj &iffalse) {
|
||||||
|
typename std::remove_const<vobj>::type ret;
|
||||||
|
|
||||||
typename std::remove_const<vobj>::type ret;
|
typedef typename vobj::scalar_object scalar_object;
|
||||||
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
|
typedef typename vobj::vector_type vector_type;
|
||||||
|
|
||||||
typedef typename vobj::scalar_object scalar_object;
|
const int Nsimd = vobj::vector_type::Nsimd();
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
const int words = sizeof(vobj) / sizeof(vector_type);
|
||||||
typedef typename vobj::vector_type vector_type;
|
|
||||||
|
|
||||||
const int Nsimd = vobj::vector_type::Nsimd();
|
std::vector<Integer> mask(Nsimd);
|
||||||
const int words = sizeof(vobj)/sizeof(vector_type);
|
std::vector<scalar_object> truevals(Nsimd);
|
||||||
|
std::vector<scalar_object> falsevals(Nsimd);
|
||||||
|
|
||||||
std::vector<Integer> mask(Nsimd);
|
extract(iftrue, truevals);
|
||||||
std::vector<scalar_object> truevals (Nsimd);
|
extract(iffalse, falsevals);
|
||||||
std::vector<scalar_object> falsevals(Nsimd);
|
extract<vInteger, Integer>(TensorRemove(predicate), mask);
|
||||||
|
|
||||||
extract(iftrue ,truevals);
|
for (int s = 0; s < Nsimd; s++) {
|
||||||
extract(iffalse ,falsevals);
|
if (mask[s]) falsevals[s] = truevals[s];
|
||||||
extract<vInteger,Integer>(TensorRemove(predicate),mask);
|
|
||||||
|
|
||||||
for(int s=0;s<Nsimd;s++){
|
|
||||||
if (mask[s]) falsevals[s]=truevals[s];
|
|
||||||
}
|
|
||||||
|
|
||||||
merge(ret,falsevals);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
merge(ret, falsevals);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// recursive evaluation of expressions; Could
|
// recursive evaluation of expressions; Could
|
||||||
// switch to generic approach with variadics, a la
|
// switch to generic approach with variadics, a la
|
||||||
@ -46,303 +76,353 @@ namespace Grid {
|
|||||||
// from tuple is hideous; C++14 introduces std::make_index_sequence for this
|
// from tuple is hideous; C++14 introduces std::make_index_sequence for this
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
|
|
||||||
|
// leaf eval of lattice ; should enable if protect using traits
|
||||||
|
|
||||||
//leaf eval of lattice ; should enable if protect using traits
|
template <typename T>
|
||||||
|
using is_lattice = std::is_base_of<LatticeBase, T>;
|
||||||
|
|
||||||
template <typename T> using is_lattice = std::is_base_of<LatticeBase,T >;
|
template <typename T>
|
||||||
|
using is_lattice_expr = std::is_base_of<LatticeExpressionBase, T>;
|
||||||
|
|
||||||
template <typename T> using is_lattice_expr = std::is_base_of<LatticeExpressionBase,T >;
|
template <typename T> using is_lattice_expr = std::is_base_of<LatticeExpressionBase,T >;
|
||||||
|
|
||||||
|
//Specialization of getVectorType for lattices
|
||||||
|
template<typename T>
|
||||||
|
struct getVectorType<Lattice<T> >{
|
||||||
|
typedef typename Lattice<T>::vector_object type;
|
||||||
|
};
|
||||||
|
|
||||||
template<class sobj>
|
template<class sobj>
|
||||||
inline sobj eval(const unsigned int ss, const sobj &arg)
|
inline sobj eval(const unsigned int ss, const sobj &arg)
|
||||||
{
|
{
|
||||||
return arg;
|
return arg;
|
||||||
}
|
}
|
||||||
template<class lobj>
|
template <class lobj>
|
||||||
inline const lobj &eval(const unsigned int ss, const Lattice<lobj> &arg)
|
inline const lobj &eval(const unsigned int ss, const Lattice<lobj> &arg) {
|
||||||
{
|
return arg._odata[ss];
|
||||||
return arg._odata[ss];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// handle nodes in syntax tree
|
// handle nodes in syntax tree
|
||||||
template <typename Op, typename T1>
|
template <typename Op, typename T1>
|
||||||
auto inline eval(const unsigned int ss, const LatticeUnaryExpression<Op,T1 > &expr) // eval one operand
|
auto inline eval(
|
||||||
-> decltype(expr.first.func(eval(ss,std::get<0>(expr.second))))
|
const unsigned int ss,
|
||||||
{
|
const LatticeUnaryExpression<Op, T1> &expr) // eval one operand
|
||||||
return expr.first.func(eval(ss,std::get<0>(expr.second)));
|
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)))) {
|
||||||
|
return expr.first.func(eval(ss, std::get<0>(expr.second)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op, typename T1, typename T2>
|
template <typename Op, typename T1, typename T2>
|
||||||
auto inline eval(const unsigned int ss, const LatticeBinaryExpression<Op,T1,T2> &expr) // eval two operands
|
auto inline eval(
|
||||||
-> decltype(expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second))))
|
const unsigned int ss,
|
||||||
{
|
const LatticeBinaryExpression<Op, T1, T2> &expr) // eval two operands
|
||||||
return expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)));
|
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||||
|
eval(ss, std::get<1>(expr.second)))) {
|
||||||
|
return expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||||
|
eval(ss, std::get<1>(expr.second)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op, typename T1, typename T2, typename T3>
|
template <typename Op, typename T1, typename T2, typename T3>
|
||||||
auto inline eval(const unsigned int ss, const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr) // eval three operands
|
auto inline eval(const unsigned int ss,
|
||||||
-> decltype(expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)),eval(ss,std::get<2>(expr.second))))
|
const LatticeTrinaryExpression<Op, T1, T2, T3>
|
||||||
{
|
&expr) // eval three operands
|
||||||
return expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)),eval(ss,std::get<2>(expr.second)) );
|
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||||
|
eval(ss, std::get<1>(expr.second)),
|
||||||
|
eval(ss, std::get<2>(expr.second)))) {
|
||||||
|
return expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||||
|
eval(ss, std::get<1>(expr.second)),
|
||||||
|
eval(ss, std::get<2>(expr.second)));
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Obtain the grid from an expression, ensuring conformable. This must follow a tree recursion
|
// Obtain the grid from an expression, ensuring conformable. This must follow a
|
||||||
|
// tree recursion
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
template<class T1, typename std::enable_if<is_lattice<T1>::value, T1>::type * =nullptr >
|
template <class T1,
|
||||||
inline void GridFromExpression(GridBase * &grid,const T1& lat) // Lattice leaf
|
typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||||
|
inline void GridFromExpression(GridBase *&grid, const T1 &lat) // Lattice leaf
|
||||||
{
|
{
|
||||||
if ( grid ) {
|
if (grid) {
|
||||||
conformable(grid,lat._grid);
|
conformable(grid, lat._grid);
|
||||||
}
|
}
|
||||||
grid=lat._grid;
|
grid = lat._grid;
|
||||||
}
|
|
||||||
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
|
|
||||||
inline void GridFromExpression(GridBase * &grid,const T1& notlat) // non-lattice leaf
|
|
||||||
{
|
|
||||||
}
|
}
|
||||||
|
template <class T1,
|
||||||
|
typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||||
|
inline void GridFromExpression(GridBase *&grid,
|
||||||
|
const T1 ¬lat) // non-lattice leaf
|
||||||
|
{}
|
||||||
template <typename Op, typename T1>
|
template <typename Op, typename T1>
|
||||||
inline void GridFromExpression(GridBase * &grid,const LatticeUnaryExpression<Op,T1 > &expr)
|
inline void GridFromExpression(GridBase *&grid,
|
||||||
{
|
const LatticeUnaryExpression<Op, T1> &expr) {
|
||||||
GridFromExpression(grid,std::get<0>(expr.second));// recurse
|
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op, typename T1, typename T2>
|
template <typename Op, typename T1, typename T2>
|
||||||
inline void GridFromExpression(GridBase * &grid,const LatticeBinaryExpression<Op,T1,T2> &expr)
|
inline void GridFromExpression(
|
||||||
{
|
GridBase *&grid, const LatticeBinaryExpression<Op, T1, T2> &expr) {
|
||||||
GridFromExpression(grid,std::get<0>(expr.second));// recurse
|
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||||
GridFromExpression(grid,std::get<1>(expr.second));
|
GridFromExpression(grid, std::get<1>(expr.second));
|
||||||
}
|
}
|
||||||
template <typename Op, typename T1, typename T2, typename T3>
|
template <typename Op, typename T1, typename T2, typename T3>
|
||||||
inline void GridFromExpression( GridBase * &grid,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr)
|
inline void GridFromExpression(
|
||||||
{
|
GridBase *&grid, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) {
|
||||||
GridFromExpression(grid,std::get<0>(expr.second));// recurse
|
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||||
GridFromExpression(grid,std::get<1>(expr.second));
|
GridFromExpression(grid, std::get<1>(expr.second));
|
||||||
GridFromExpression(grid,std::get<2>(expr.second));
|
GridFromExpression(grid, std::get<2>(expr.second));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Obtain the CB from an expression, ensuring conformable. This must follow a tree recursion
|
// Obtain the CB from an expression, ensuring conformable. This must follow a
|
||||||
|
// tree recursion
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
template<class T1, typename std::enable_if<is_lattice<T1>::value, T1>::type * =nullptr >
|
template <class T1,
|
||||||
inline void CBFromExpression(int &cb,const T1& lat) // Lattice leaf
|
typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||||
|
inline void CBFromExpression(int &cb, const T1 &lat) // Lattice leaf
|
||||||
{
|
{
|
||||||
if ( (cb==Odd) || (cb==Even) ) {
|
if ((cb == Odd) || (cb == Even)) {
|
||||||
assert(cb==lat.checkerboard);
|
assert(cb == lat.checkerboard);
|
||||||
}
|
}
|
||||||
cb=lat.checkerboard;
|
cb = lat.checkerboard;
|
||||||
// std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
|
template <class T1,
|
||||||
inline void CBFromExpression(int &cb,const T1& notlat) // non-lattice leaf
|
typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||||
|
inline void CBFromExpression(int &cb, const T1 ¬lat) // non-lattice leaf
|
||||||
{
|
{
|
||||||
// std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
template <typename Op, typename T1>
|
template <typename Op, typename T1>
|
||||||
inline void CBFromExpression(int &cb,const LatticeUnaryExpression<Op,T1 > &expr)
|
inline void CBFromExpression(int &cb,
|
||||||
{
|
const LatticeUnaryExpression<Op, T1> &expr) {
|
||||||
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||||
// std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op, typename T1, typename T2>
|
template <typename Op, typename T1, typename T2>
|
||||||
inline void CBFromExpression(int &cb,const LatticeBinaryExpression<Op,T1,T2> &expr)
|
inline void CBFromExpression(int &cb,
|
||||||
{
|
const LatticeBinaryExpression<Op, T1, T2> &expr) {
|
||||||
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||||
CBFromExpression(cb,std::get<1>(expr.second));
|
CBFromExpression(cb, std::get<1>(expr.second));
|
||||||
// std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
template <typename Op, typename T1, typename T2, typename T3>
|
template <typename Op, typename T1, typename T2, typename T3>
|
||||||
inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr)
|
inline void CBFromExpression(
|
||||||
{
|
int &cb, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) {
|
||||||
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||||
CBFromExpression(cb,std::get<1>(expr.second));
|
CBFromExpression(cb, std::get<1>(expr.second));
|
||||||
CBFromExpression(cb,std::get<2>(expr.second));
|
CBFromExpression(cb, std::get<2>(expr.second));
|
||||||
// std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Unary operators and funcs
|
// Unary operators and funcs
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
#define GridUnopClass(name,ret)\
|
#define GridUnopClass(name, ret) \
|
||||||
template <class arg> struct name\
|
template <class arg> \
|
||||||
{\
|
struct name { \
|
||||||
static auto inline func(const arg a)-> decltype(ret) { return ret; } \
|
static auto inline func(const arg a) -> decltype(ret) { return ret; } \
|
||||||
};
|
};
|
||||||
|
|
||||||
GridUnopClass(UnarySub,-a);
|
GridUnopClass(UnarySub, -a);
|
||||||
GridUnopClass(UnaryNot,Not(a));
|
GridUnopClass(UnaryNot, Not(a));
|
||||||
GridUnopClass(UnaryAdj,adj(a));
|
GridUnopClass(UnaryAdj, adj(a));
|
||||||
GridUnopClass(UnaryConj,conjugate(a));
|
GridUnopClass(UnaryConj, conjugate(a));
|
||||||
GridUnopClass(UnaryTrace,trace(a));
|
GridUnopClass(UnaryTrace, trace(a));
|
||||||
GridUnopClass(UnaryTranspose,transpose(a));
|
GridUnopClass(UnaryTranspose, transpose(a));
|
||||||
GridUnopClass(UnaryTa,Ta(a));
|
GridUnopClass(UnaryTa, Ta(a));
|
||||||
GridUnopClass(UnaryProjectOnGroup,ProjectOnGroup(a));
|
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
|
||||||
GridUnopClass(UnaryReal,real(a));
|
GridUnopClass(UnaryReal, real(a));
|
||||||
GridUnopClass(UnaryImag,imag(a));
|
GridUnopClass(UnaryImag, imag(a));
|
||||||
GridUnopClass(UnaryToReal,toReal(a));
|
GridUnopClass(UnaryToReal, toReal(a));
|
||||||
GridUnopClass(UnaryToComplex,toComplex(a));
|
GridUnopClass(UnaryToComplex, toComplex(a));
|
||||||
GridUnopClass(UnaryAbs,abs(a));
|
GridUnopClass(UnaryTimesI, timesI(a));
|
||||||
GridUnopClass(UnarySqrt,sqrt(a));
|
GridUnopClass(UnaryTimesMinusI, timesMinusI(a));
|
||||||
GridUnopClass(UnaryRsqrt,rsqrt(a));
|
GridUnopClass(UnaryAbs, abs(a));
|
||||||
GridUnopClass(UnarySin,sin(a));
|
GridUnopClass(UnarySqrt, sqrt(a));
|
||||||
GridUnopClass(UnaryCos,cos(a));
|
GridUnopClass(UnaryRsqrt, rsqrt(a));
|
||||||
GridUnopClass(UnaryLog,log(a));
|
GridUnopClass(UnarySin, sin(a));
|
||||||
GridUnopClass(UnaryExp,exp(a));
|
GridUnopClass(UnaryCos, cos(a));
|
||||||
|
GridUnopClass(UnaryAsin, asin(a));
|
||||||
|
GridUnopClass(UnaryAcos, acos(a));
|
||||||
|
GridUnopClass(UnaryLog, log(a));
|
||||||
|
GridUnopClass(UnaryExp, exp(a));
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Binary operators
|
// Binary operators
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
#define GridBinOpClass(name,combination)\
|
#define GridBinOpClass(name, combination) \
|
||||||
template <class left,class right>\
|
template <class left, class right> \
|
||||||
struct name\
|
struct name { \
|
||||||
{\
|
static auto inline func(const left &lhs, const right &rhs) \
|
||||||
static auto inline func(const left &lhs,const right &rhs)-> decltype(combination) const \
|
-> decltype(combination) const { \
|
||||||
{\
|
return combination; \
|
||||||
return combination;\
|
} \
|
||||||
}\
|
}
|
||||||
}
|
GridBinOpClass(BinaryAdd, lhs + rhs);
|
||||||
GridBinOpClass(BinaryAdd,lhs+rhs);
|
GridBinOpClass(BinarySub, lhs - rhs);
|
||||||
GridBinOpClass(BinarySub,lhs-rhs);
|
GridBinOpClass(BinaryMul, lhs *rhs);
|
||||||
GridBinOpClass(BinaryMul,lhs*rhs);
|
GridBinOpClass(BinaryDiv, lhs /rhs);
|
||||||
|
|
||||||
GridBinOpClass(BinaryAnd ,lhs&rhs);
|
GridBinOpClass(BinaryAnd, lhs &rhs);
|
||||||
GridBinOpClass(BinaryOr ,lhs|rhs);
|
GridBinOpClass(BinaryOr, lhs | rhs);
|
||||||
GridBinOpClass(BinaryAndAnd,lhs&&rhs);
|
GridBinOpClass(BinaryAndAnd, lhs &&rhs);
|
||||||
GridBinOpClass(BinaryOrOr ,lhs||rhs);
|
GridBinOpClass(BinaryOrOr, lhs || rhs);
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
// Trinary conditional op
|
// Trinary conditional op
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
#define GridTrinOpClass(name,combination)\
|
#define GridTrinOpClass(name, combination) \
|
||||||
template <class predicate,class left, class right> \
|
template <class predicate, class left, class right> \
|
||||||
struct name\
|
struct name { \
|
||||||
{\
|
static auto inline func(const predicate &pred, const left &lhs, \
|
||||||
static auto inline func(const predicate &pred,const left &lhs,const right &rhs)-> decltype(combination) const \
|
const right &rhs) -> decltype(combination) const { \
|
||||||
{\
|
return combination; \
|
||||||
return combination;\
|
} \
|
||||||
}\
|
}
|
||||||
}
|
|
||||||
|
|
||||||
GridTrinOpClass(TrinaryWhere,(predicatedWhere<predicate, \
|
GridTrinOpClass(
|
||||||
typename std::remove_reference<left>::type, \
|
TrinaryWhere,
|
||||||
typename std::remove_reference<right>::type> (pred,lhs,rhs)));
|
(predicatedWhere<predicate, typename std::remove_reference<left>::type,
|
||||||
|
typename std::remove_reference<right>::type>(pred, lhs,
|
||||||
|
rhs)));
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Operator syntactical glue
|
// Operator syntactical glue
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
|
|
||||||
#define GRID_UNOP(name) name<decltype(eval(0, arg))>
|
#define GRID_UNOP(name) name<decltype(eval(0, arg))>
|
||||||
#define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
#define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||||
#define GRID_TRINOP(name) name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
#define GRID_TRINOP(name) \
|
||||||
|
name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||||
|
|
||||||
#define GRID_DEF_UNOP(op, name)\
|
#define GRID_DEF_UNOP(op, name) \
|
||||||
template <typename T1,\
|
template <typename T1, \
|
||||||
typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value, T1>::type* = nullptr> inline auto op(const T1 &arg) \
|
typename std::enable_if<is_lattice<T1>::value || \
|
||||||
-> decltype(LatticeUnaryExpression<GRID_UNOP(name),const T1&>(std::make_pair(GRID_UNOP(name)(),std::forward_as_tuple(arg)))) \
|
is_lattice_expr<T1>::value, \
|
||||||
{ return LatticeUnaryExpression<GRID_UNOP(name), const T1 &>(std::make_pair(GRID_UNOP(name)(),std::forward_as_tuple(arg))); }
|
T1>::type * = nullptr> \
|
||||||
|
inline auto op(const T1 &arg) \
|
||||||
|
->decltype(LatticeUnaryExpression<GRID_UNOP(name), const T1 &>( \
|
||||||
|
std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg)))) { \
|
||||||
|
return LatticeUnaryExpression<GRID_UNOP(name), const T1 &>( \
|
||||||
|
std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg))); \
|
||||||
|
}
|
||||||
|
|
||||||
#define GRID_BINOP_LEFT(op, name)\
|
#define GRID_BINOP_LEFT(op, name) \
|
||||||
template <typename T1,typename T2,\
|
template <typename T1, typename T2, \
|
||||||
typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value, T1>::type* = nullptr>\
|
typename std::enable_if<is_lattice<T1>::value || \
|
||||||
inline auto op(const T1 &lhs,const T2&rhs) \
|
is_lattice_expr<T1>::value, \
|
||||||
-> decltype(LatticeBinaryExpression<GRID_BINOP(name),const T1&,const T2 &>(std::make_pair(GRID_BINOP(name)(),\
|
T1>::type * = nullptr> \
|
||||||
std::forward_as_tuple(lhs, rhs)))) \
|
inline auto op(const T1 &lhs, const T2 &rhs) \
|
||||||
{\
|
->decltype( \
|
||||||
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>(std::make_pair(GRID_BINOP(name)(),\
|
LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||||
std::forward_as_tuple(lhs, rhs))); \
|
std::make_pair(GRID_BINOP(name)(), \
|
||||||
}
|
std::forward_as_tuple(lhs, rhs)))) { \
|
||||||
|
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||||
|
std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \
|
||||||
|
}
|
||||||
|
|
||||||
#define GRID_BINOP_RIGHT(op, name)\
|
#define GRID_BINOP_RIGHT(op, name) \
|
||||||
template <typename T1,typename T2,\
|
template <typename T1, typename T2, \
|
||||||
typename std::enable_if<!is_lattice<T1>::value && !is_lattice_expr<T1>::value, T1>::type* = nullptr,\
|
typename std::enable_if<!is_lattice<T1>::value && \
|
||||||
typename std::enable_if< is_lattice<T2>::value || is_lattice_expr<T2>::value, T2>::type* = nullptr> \
|
!is_lattice_expr<T1>::value, \
|
||||||
inline auto op(const T1 &lhs,const T2&rhs) \
|
T1>::type * = nullptr, \
|
||||||
-> decltype(LatticeBinaryExpression<GRID_BINOP(name),const T1&,const T2 &>(std::make_pair(GRID_BINOP(name)(),\
|
typename std::enable_if<is_lattice<T2>::value || \
|
||||||
std::forward_as_tuple(lhs, rhs)))) \
|
is_lattice_expr<T2>::value, \
|
||||||
{\
|
T2>::type * = nullptr> \
|
||||||
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>(std::make_pair(GRID_BINOP(name)(),\
|
inline auto op(const T1 &lhs, const T2 &rhs) \
|
||||||
std::forward_as_tuple(lhs, rhs))); \
|
->decltype( \
|
||||||
}
|
LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||||
|
std::make_pair(GRID_BINOP(name)(), \
|
||||||
|
std::forward_as_tuple(lhs, rhs)))) { \
|
||||||
|
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||||
|
std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \
|
||||||
|
}
|
||||||
|
|
||||||
#define GRID_DEF_BINOP(op, name)\
|
#define GRID_DEF_BINOP(op, name) \
|
||||||
GRID_BINOP_LEFT(op,name);\
|
GRID_BINOP_LEFT(op, name); \
|
||||||
GRID_BINOP_RIGHT(op,name);
|
GRID_BINOP_RIGHT(op, name);
|
||||||
|
|
||||||
|
#define GRID_DEF_TRINOP(op, name) \
|
||||||
#define GRID_DEF_TRINOP(op, name)\
|
template <typename T1, typename T2, typename T3> \
|
||||||
template <typename T1,typename T2,typename T3> inline auto op(const T1 &pred,const T2&lhs,const T3 &rhs) \
|
inline auto op(const T1 &pred, const T2 &lhs, const T3 &rhs) \
|
||||||
-> decltype(LatticeTrinaryExpression<GRID_TRINOP(name),const T1&,const T2 &,const T3&>(std::make_pair(GRID_TRINOP(name)(),\
|
->decltype( \
|
||||||
std::forward_as_tuple(pred,lhs,rhs)))) \
|
LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &, \
|
||||||
{\
|
const T3 &>(std::make_pair( \
|
||||||
return LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &,const T3&>(std::make_pair(GRID_TRINOP(name)(), \
|
GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs)))) { \
|
||||||
std::forward_as_tuple(pred,lhs, rhs))); \
|
return LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &, \
|
||||||
}
|
const T3 &>(std::make_pair( \
|
||||||
|
GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs))); \
|
||||||
|
}
|
||||||
////////////////////////
|
////////////////////////
|
||||||
//Operator definitions
|
// Operator definitions
|
||||||
////////////////////////
|
////////////////////////
|
||||||
|
|
||||||
GRID_DEF_UNOP(operator -,UnarySub);
|
GRID_DEF_UNOP(operator-, UnarySub);
|
||||||
GRID_DEF_UNOP(Not,UnaryNot);
|
GRID_DEF_UNOP(Not, UnaryNot);
|
||||||
GRID_DEF_UNOP(operator !,UnaryNot);
|
GRID_DEF_UNOP(operator!, UnaryNot);
|
||||||
GRID_DEF_UNOP(adj,UnaryAdj);
|
GRID_DEF_UNOP(adj, UnaryAdj);
|
||||||
GRID_DEF_UNOP(conjugate,UnaryConj);
|
GRID_DEF_UNOP(conjugate, UnaryConj);
|
||||||
GRID_DEF_UNOP(trace,UnaryTrace);
|
GRID_DEF_UNOP(trace, UnaryTrace);
|
||||||
GRID_DEF_UNOP(transpose,UnaryTranspose);
|
GRID_DEF_UNOP(transpose, UnaryTranspose);
|
||||||
GRID_DEF_UNOP(Ta,UnaryTa);
|
GRID_DEF_UNOP(Ta, UnaryTa);
|
||||||
GRID_DEF_UNOP(ProjectOnGroup,UnaryProjectOnGroup);
|
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
|
||||||
GRID_DEF_UNOP(real,UnaryReal);
|
GRID_DEF_UNOP(real, UnaryReal);
|
||||||
GRID_DEF_UNOP(imag,UnaryImag);
|
GRID_DEF_UNOP(imag, UnaryImag);
|
||||||
GRID_DEF_UNOP(toReal,UnaryToReal);
|
GRID_DEF_UNOP(toReal, UnaryToReal);
|
||||||
GRID_DEF_UNOP(toComplex,UnaryToComplex);
|
GRID_DEF_UNOP(toComplex, UnaryToComplex);
|
||||||
GRID_DEF_UNOP(abs ,UnaryAbs); //abs overloaded in cmath C++98; DON'T do the abs-fabs-dabs-labs thing
|
GRID_DEF_UNOP(timesI, UnaryTimesI);
|
||||||
GRID_DEF_UNOP(sqrt ,UnarySqrt);
|
GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI);
|
||||||
GRID_DEF_UNOP(rsqrt,UnaryRsqrt);
|
GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the
|
||||||
GRID_DEF_UNOP(sin ,UnarySin);
|
// abs-fabs-dabs-labs thing
|
||||||
GRID_DEF_UNOP(cos ,UnaryCos);
|
GRID_DEF_UNOP(sqrt, UnarySqrt);
|
||||||
GRID_DEF_UNOP(log ,UnaryLog);
|
GRID_DEF_UNOP(rsqrt, UnaryRsqrt);
|
||||||
GRID_DEF_UNOP(exp ,UnaryExp);
|
GRID_DEF_UNOP(sin, UnarySin);
|
||||||
|
GRID_DEF_UNOP(cos, UnaryCos);
|
||||||
|
GRID_DEF_UNOP(asin, UnaryAsin);
|
||||||
|
GRID_DEF_UNOP(acos, UnaryAcos);
|
||||||
|
GRID_DEF_UNOP(log, UnaryLog);
|
||||||
|
GRID_DEF_UNOP(exp, UnaryExp);
|
||||||
|
|
||||||
GRID_DEF_BINOP(operator+,BinaryAdd);
|
GRID_DEF_BINOP(operator+, BinaryAdd);
|
||||||
GRID_DEF_BINOP(operator-,BinarySub);
|
GRID_DEF_BINOP(operator-, BinarySub);
|
||||||
GRID_DEF_BINOP(operator*,BinaryMul);
|
GRID_DEF_BINOP(operator*, BinaryMul);
|
||||||
|
GRID_DEF_BINOP(operator/, BinaryDiv);
|
||||||
|
|
||||||
GRID_DEF_BINOP(operator&,BinaryAnd);
|
GRID_DEF_BINOP(operator&, BinaryAnd);
|
||||||
GRID_DEF_BINOP(operator|,BinaryOr);
|
GRID_DEF_BINOP(operator|, BinaryOr);
|
||||||
GRID_DEF_BINOP(operator&&,BinaryAndAnd);
|
GRID_DEF_BINOP(operator&&, BinaryAndAnd);
|
||||||
GRID_DEF_BINOP(operator||,BinaryOrOr);
|
GRID_DEF_BINOP(operator||, BinaryOrOr);
|
||||||
|
|
||||||
GRID_DEF_TRINOP(where,TrinaryWhere);
|
GRID_DEF_TRINOP(where, TrinaryWhere);
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
// Closure convenience to force expression to evaluate
|
// Closure convenience to force expression to evaluate
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
template<class Op,class T1>
|
template <class Op, class T1>
|
||||||
auto closure(const LatticeUnaryExpression<Op,T1> & expr)
|
auto closure(const LatticeUnaryExpression<Op, T1> &expr)
|
||||||
-> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second))))>
|
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> {
|
||||||
{
|
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> ret(
|
||||||
Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second))))> ret(expr);
|
expr);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template<class Op,class T1, class T2>
|
template <class Op, class T1, class T2>
|
||||||
auto closure(const LatticeBinaryExpression<Op,T1,T2> & expr)
|
auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||||
-> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||||
eval(0,std::get<1>(expr.second))))>
|
eval(0, std::get<1>(expr.second))))> {
|
||||||
{
|
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||||
Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
eval(0, std::get<1>(expr.second))))>
|
||||||
eval(0,std::get<1>(expr.second))))> ret(expr);
|
ret(expr);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template<class Op,class T1, class T2, class T3>
|
template <class Op, class T1, class T2, class T3>
|
||||||
auto closure(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
|
auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||||
-> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||||
eval(0,std::get<1>(expr.second)),
|
eval(0, std::get<1>(expr.second)),
|
||||||
eval(0,std::get<2>(expr.second))))>
|
eval(0, std::get<2>(expr.second))))> {
|
||||||
{
|
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||||
Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
eval(0, std::get<1>(expr.second)),
|
||||||
eval(0,std::get<1>(expr.second)),
|
eval(0, std::get<2>(expr.second))))>
|
||||||
eval(0,std::get<2>(expr.second))))> ret(expr);
|
ret(expr);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -353,7 +433,6 @@ template<class Op,class T1, class T2, class T3>
|
|||||||
#undef GRID_DEF_UNOP
|
#undef GRID_DEF_UNOP
|
||||||
#undef GRID_DEF_BINOP
|
#undef GRID_DEF_BINOP
|
||||||
#undef GRID_DEF_TRINOP
|
#undef GRID_DEF_TRINOP
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
@ -368,7 +447,7 @@ using namespace Grid;
|
|||||||
BinaryAdd<double,double> tmp;
|
BinaryAdd<double,double> tmp;
|
||||||
LatticeBinaryExpression<BinaryAdd<double,double>,Lattice<double> &,Lattice<double> &>
|
LatticeBinaryExpression<BinaryAdd<double,double>,Lattice<double> &,Lattice<double> &>
|
||||||
expr(std::make_pair(tmp,
|
expr(std::make_pair(tmp,
|
||||||
std::forward_as_tuple(v1,v2)));
|
std::forward_as_tuple(v1,v2)));
|
||||||
tmp.func(eval(0,v1),eval(0,v2));
|
tmp.func(eval(0,v1),eval(0,v2));
|
||||||
|
|
||||||
auto var = v1+v2;
|
auto var = v1+v2;
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/lattice/Lattice_arith.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_LATTICE_ARITH_H
|
#ifndef GRID_LATTICE_ARITH_H
|
||||||
#define GRID_LATTICE_ARITH_H
|
#define GRID_LATTICE_ARITH_H
|
||||||
|
|
||||||
|
@ -1,3 +1,33 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/lattice/Lattice_base.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_LATTICE_BASE_H
|
#ifndef GRID_LATTICE_BASE_H
|
||||||
#define GRID_LATTICE_BASE_H
|
#define GRID_LATTICE_BASE_H
|
||||||
|
|
||||||
@ -26,11 +56,14 @@ extern int GridCshiftPermuteMap[4][16];
|
|||||||
// Basic expressions used in Expression Template
|
// Basic expressions used in Expression Template
|
||||||
////////////////////////////////////////////////
|
////////////////////////////////////////////////
|
||||||
|
|
||||||
class LatticeBase {};
|
class LatticeBase
|
||||||
class LatticeExpressionBase {};
|
{
|
||||||
|
public:
|
||||||
|
virtual ~LatticeBase(void) = default;
|
||||||
|
GridBase *_grid;
|
||||||
|
};
|
||||||
|
|
||||||
template<class T> using Vector = std::vector<T,alignedAllocator<T> >; // Aligned allocator??
|
class LatticeExpressionBase {};
|
||||||
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >; // Aligned allocator??
|
|
||||||
|
|
||||||
template <typename Op, typename T1>
|
template <typename Op, typename T1>
|
||||||
class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
|
class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
|
||||||
@ -59,8 +92,6 @@ template<class vobj>
|
|||||||
class Lattice : public LatticeBase
|
class Lattice : public LatticeBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
GridBase *_grid;
|
|
||||||
int checkerboard;
|
int checkerboard;
|
||||||
Vector<vobj> _odata;
|
Vector<vobj> _odata;
|
||||||
|
|
||||||
@ -68,13 +99,13 @@ public:
|
|||||||
int begin(void) { return 0;};
|
int begin(void) { return 0;};
|
||||||
int end(void) { return _odata.size(); }
|
int end(void) { return _odata.size(); }
|
||||||
vobj & operator[](int i) { return _odata[i]; };
|
vobj & operator[](int i) { return _odata[i]; };
|
||||||
|
const vobj & operator[](int i) const { return _odata[i]; };
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
typedef typename vobj::vector_type vector_type;
|
typedef typename vobj::vector_type vector_type;
|
||||||
typedef vobj vector_object;
|
typedef vobj vector_object;
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Expression Template closure support
|
// Expression Template closure support
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -149,8 +180,8 @@ PARALLEL_FOR_LOOP
|
|||||||
}
|
}
|
||||||
//GridFromExpression is tricky to do
|
//GridFromExpression is tricky to do
|
||||||
template<class Op,class T1>
|
template<class Op,class T1>
|
||||||
Lattice(const LatticeUnaryExpression<Op,T1> & expr): _grid(nullptr){
|
Lattice(const LatticeUnaryExpression<Op,T1> & expr) {
|
||||||
|
_grid = nullptr;
|
||||||
GridFromExpression(_grid,expr);
|
GridFromExpression(_grid,expr);
|
||||||
assert(_grid!=nullptr);
|
assert(_grid!=nullptr);
|
||||||
|
|
||||||
@ -171,7 +202,8 @@ PARALLEL_FOR_LOOP
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
template<class Op,class T1, class T2>
|
template<class Op,class T1, class T2>
|
||||||
Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr): _grid(nullptr){
|
Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr) {
|
||||||
|
_grid = nullptr;
|
||||||
GridFromExpression(_grid,expr);
|
GridFromExpression(_grid,expr);
|
||||||
assert(_grid!=nullptr);
|
assert(_grid!=nullptr);
|
||||||
|
|
||||||
@ -192,7 +224,8 @@ PARALLEL_FOR_LOOP
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
template<class Op,class T1, class T2, class T3>
|
template<class Op,class T1, class T2, class T3>
|
||||||
Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr): _grid(nullptr){
|
Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) {
|
||||||
|
_grid = nullptr;
|
||||||
GridFromExpression(_grid,expr);
|
GridFromExpression(_grid,expr);
|
||||||
assert(_grid!=nullptr);
|
assert(_grid!=nullptr);
|
||||||
|
|
||||||
@ -212,14 +245,29 @@ PARALLEL_FOR_LOOP
|
|||||||
// Constructor requires "grid" passed.
|
// Constructor requires "grid" passed.
|
||||||
// what about a default grid?
|
// what about a default grid?
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
Lattice(GridBase *grid) : _grid(grid), _odata(_grid->oSites()) {
|
Lattice(GridBase *grid) : _odata(grid->oSites()) {
|
||||||
// _odata.reserve(_grid->oSites());
|
_grid = grid;
|
||||||
// _odata.resize(_grid->oSites());
|
// _odata.reserve(_grid->oSites());
|
||||||
|
// _odata.resize(_grid->oSites());
|
||||||
// std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl;
|
// std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl;
|
||||||
assert((((uint64_t)&_odata[0])&0xF) ==0);
|
assert((((uint64_t)&_odata[0])&0xF) ==0);
|
||||||
checkerboard=0;
|
checkerboard=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Lattice(const Lattice& r){ // copy constructor
|
||||||
|
_grid = r._grid;
|
||||||
|
checkerboard = r.checkerboard;
|
||||||
|
_odata.resize(_grid->oSites());// essential
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
|
_odata[ss]=r._odata[ss];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
virtual ~Lattice(void) = default;
|
||||||
|
|
||||||
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
|
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<_grid->oSites();ss++){
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
@ -230,7 +278,7 @@ PARALLEL_FOR_LOOP
|
|||||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||||
this->checkerboard = r.checkerboard;
|
this->checkerboard = r.checkerboard;
|
||||||
conformable(*this,r);
|
conformable(*this,r);
|
||||||
std::cout<<GridLogMessage<<"Lattice operator ="<<std::endl;
|
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<_grid->oSites();ss++){
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
this->_odata[ss]=r._odata[ss];
|
this->_odata[ss]=r._odata[ss];
|
||||||
@ -252,17 +300,6 @@ PARALLEL_FOR_LOOP
|
|||||||
*this = (*this)+r;
|
*this = (*this)+r;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
strong_inline friend Lattice<vobj> operator / (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
|
||||||
conformable(lhs,rhs);
|
|
||||||
Lattice<vobj> ret(lhs._grid);
|
|
||||||
PARALLEL_FOR_LOOP
|
|
||||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
|
||||||
ret._odata[ss] = lhs._odata[ss]*pow(rhs._odata[ss],-1.0);
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
};
|
|
||||||
|
|
||||||
}; // class Lattice
|
}; // class Lattice
|
||||||
|
|
||||||
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
||||||
@ -287,27 +324,27 @@ PARALLEL_FOR_LOOP
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include <lattice/Lattice_conformable.h>
|
#include "Lattice_conformable.h"
|
||||||
#define GRID_LATTICE_EXPRESSION_TEMPLATES
|
#define GRID_LATTICE_EXPRESSION_TEMPLATES
|
||||||
#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
|
#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
|
||||||
#include <lattice/Lattice_ET.h>
|
#include "Lattice_ET.h"
|
||||||
#else
|
#else
|
||||||
#include <lattice/Lattice_overload.h>
|
#include "Lattice_overload.h"
|
||||||
#endif
|
#endif
|
||||||
#include <lattice/Lattice_arith.h>
|
#include "Lattice_arith.h"
|
||||||
#include <lattice/Lattice_trace.h>
|
#include "Lattice_trace.h"
|
||||||
#include <lattice/Lattice_transpose.h>
|
#include "Lattice_transpose.h"
|
||||||
#include <lattice/Lattice_local.h>
|
#include "Lattice_local.h"
|
||||||
#include <lattice/Lattice_reduction.h>
|
#include "Lattice_reduction.h"
|
||||||
#include <lattice/Lattice_peekpoke.h>
|
#include "Lattice_peekpoke.h"
|
||||||
#include <lattice/Lattice_reality.h>
|
#include "Lattice_reality.h"
|
||||||
#include <lattice/Lattice_comparison_utils.h>
|
#include "Lattice_comparison_utils.h"
|
||||||
#include <lattice/Lattice_comparison.h>
|
#include "Lattice_comparison.h"
|
||||||
#include <lattice/Lattice_coordinate.h>
|
#include "Lattice_coordinate.h"
|
||||||
#include <lattice/Lattice_where.h>
|
#include "Lattice_where.h"
|
||||||
#include <lattice/Lattice_rng.h>
|
#include "Lattice_rng.h"
|
||||||
#include <lattice/Lattice_unary.h>
|
#include "Lattice_unary.h"
|
||||||
#include <lattice/Lattice_transfer.h>
|
#include "Lattice_transfer.h"
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,3 +1,31 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/lattice/Lattice_comparison.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_LATTICE_COMPARISON_H
|
#ifndef GRID_LATTICE_COMPARISON_H
|
||||||
#define GRID_LATTICE_COMPARISON_H
|
#define GRID_LATTICE_COMPARISON_H
|
||||||
|
|
||||||
|
@ -1,3 +1,31 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/lattice/Lattice_comparison_utils.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_COMPARISON_H
|
#ifndef GRID_COMPARISON_H
|
||||||
#define GRID_COMPARISON_H
|
#define GRID_COMPARISON_H
|
||||||
|
|
||||||
|
@ -1,3 +1,30 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/lattice/Lattice_conformable.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_LATTICE_CONFORMABLE_H
|
#ifndef GRID_LATTICE_CONFORMABLE_H
|
||||||
#define GRID_LATTICE_CONFORMABLE_H
|
#define GRID_LATTICE_CONFORMABLE_H
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user