1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-14 22:07:05 +01:00

Compare commits

..

358 Commits

Author SHA1 Message Date
d0c2c9c71f Merge branch 'develop' of https://github.com/paboyle/Grid into bugfix/dminus 2017-04-04 15:20:17 -04:00
c8cafa77ca Checking in the latest Lacnzos 2017-04-04 15:18:12 -04:00
5592f7b8c1 Creation mode better implementation 2017-04-05 02:35:34 +09:00
35da4ece0b UID fix 2017-04-05 02:18:15 +09:00
061b15b9e9 Merge branch 'feature/sitmo-skipahead' into develop 2017-04-05 01:24:49 +09:00
561426f6eb Clean up 2017-04-02 23:13:48 +09:00
83f6fab8fa Big/Small crush test, and fast SITMO rng init, faster but not ideal
MT and Ranlux init.
2017-04-02 12:10:51 +09:00
0fade84ab2 No random device 2017-04-02 00:29:40 +09:00
9dc7ca4c3b Sitmo fast init 2017-04-02 00:28:22 +09:00
935d82f5b1 sanity checks 2017-04-02 00:27:28 +09:00
9cbcdd65d7 No random device seed 2017-04-02 00:26:57 +09:00
f18f5ed926 Drop random device 2017-04-02 00:26:26 +09:00
d1d63a4f2d sitmo default 2017-04-02 00:26:05 +09:00
7e5faa0f34 Multiple RNGs 2017-04-02 00:25:44 +09:00
6af459cae4 Christoph's coefficients. 2017-03-31 17:07:43 +09:00
1c4bc7ed38 Debugged staggered conventions 2017-03-31 14:41:48 +09:00
a3bcad3804 Added preconditioned SYM2 solver (SchurRedBlackDiagTwoSolve) 2017-03-30 20:33:27 -04:00
5a5b66292b Merge branch 'develop' of https://github.com/paboyle/Grid into bugfix/dminus 2017-03-30 10:44:02 -04:00
93ea5d9468 Pretty code 2017-03-30 15:00:03 +09:00
1ec5d32369 Chulwoo's test to zmobius helped me shake out 2017-03-30 13:45:13 +09:00
9fd23faadf Pretty layout 2017-03-30 13:44:45 +09:00
10e4fa0dc8 Template instantiation improvements 2017-03-30 13:44:25 +09:00
c4aca1dde4 Conjugate coefficients on adjoint 2017-03-30 13:44:05 +09:00
b9e8ea3aaa conjugate coefficient on the dagger 2017-03-30 13:43:13 +09:00
077aa728b9 Fix the ZMobius (I think) 2017-03-30 13:42:09 +09:00
a8d83d886e Macro controls 2017-03-30 13:31:34 +09:00
7fd46eeec4 Trailing whitespace removal 2017-03-30 13:31:10 +09:00
e0c4eeb3ec Compiles again 2017-03-30 13:30:45 +09:00
cb9a297a0a Chulwoo's Zmobius test 2017-03-30 13:30:25 +09:00
2b115929dc Small AVX512 asm ifdef patch 2017-03-29 18:51:23 +09:00
5c6571dab1 Merge branch 'feature/bgq-asm' into develop 2017-03-29 18:48:55 +09:00
417ec56cca Release candidate 2017-03-29 05:45:33 -04:00
756bc25008 Verbose header print by default 2017-03-29 04:44:17 -04:00
35695ba57a Bug fix in MPI3 2017-03-29 04:43:55 -04:00
81ead48850 Log any errors to a file 2017-03-29 04:39:52 -04:00
d805867e02 Better init 2017-03-28 13:25:05 -04:00
e55a751e23 Merge branch 'feature/bgq-asm' of https://github.com/paboyle/Grid into feature/bgq-asm 2017-03-28 12:20:12 -04:00
358eb75995 Shorten loop 2017-03-28 12:20:02 -04:00
98f9318279 Build on AVX2 and MPI passing with clang++ 2017-03-28 23:16:04 +09:00
4b17e8eba8 Merge branch 'develop' into feature/bgq-asm
Conflicts:
	lib/qcd/action/fermion/Fermion.h
	lib/qcd/action/fermion/WilsonFermion.cc
	lib/util/Init.cc
	tests/Test_cayley_even_odd_vec.cc
2017-03-28 04:49:30 -04:00
e63be32ad2 zmobius Meooe5D fixed? 2017-03-28 03:48:50 -04:00
75112a632a IO improvements to fail on IO error 2017-03-28 02:28:04 -04:00
18bde08d1b Merge branch 'feature/staggering' into develop 2017-03-28 15:25:55 +09:00
6aa106d906 Fixing zmobius coeffs again 2017-03-27 21:57:07 -04:00
33d59c8869 Adding Zmobius prec test 2017-03-27 21:40:27 -04:00
a833fd8dbf Merge branch 'develop' of https://github.com/paboyle/Grid into bugfix/dminus 2017-03-27 21:37:26 -04:00
d45cd7e677 Adding a simple read of NERSC test 2017-03-26 09:24:26 -04:00
4e96679797 Added a bnl log 2017-03-25 09:25:46 -04:00
fc93f0b2ec Save some code for static huge tlb's. It is ifdef'ed out but an interesting root only experiment.
No gain from it.
2017-03-21 22:30:29 -04:00
8c8473998d Average over whole cluster the comm time. 2017-03-21 22:29:51 -04:00
e9712bc7fb Zmobius test was wrong! (only mobius) checking in again 2017-03-16 23:04:28 -04:00
e7c36771ed ZMobius prep for asm 2017-03-15 14:23:33 -04:00
8dc57a1e25 Layout change 2017-03-13 11:11:46 +00:00
f57bd770b0 Merge branch 'bugfix/dminus' into feature/bgq-asm 2017-03-13 11:11:03 +00:00
4ed10a3d06 Merge branch 'develop' into feature/bgq-asm 2017-03-13 11:10:10 +00:00
dfefc70b57 Merge pull request #93 from Lanny91/hotfix/qpx
Some fixes for QPX and generic SIMD types.
2017-03-13 09:31:26 +00:00
0b61f75c9e Adding ZMobius CG test 2017-03-13 00:12:43 -04:00
33edde245d Changing Dminus(Dag) to use full vectors to work correctly 2017-03-12 23:02:42 -04:00
b64e004555 MPI run fail on macos 2017-03-13 01:59:01 +00:00
447c5e6cd7 Z mobius hermiticity correction 2017-03-13 01:30:43 +00:00
8b99d80d8c Merge branch 'bgq-asm-shmemfixes' into feature/bgq-asm 2017-03-12 23:30:09 +00:00
3901b17ade timeings from BNL 2017-02-28 17:06:45 -05:00
af230a1fb8 Average the time across the whole machine for outliers 2017-02-28 17:05:22 -05:00
06a132e3f9 Fixes to SHMEM comms 2017-02-28 13:31:54 -08:00
96d44d5c55 Header fix 2017-02-24 19:12:11 -05:00
7fe797daf8 SIMD vector length sanity checks 2017-02-23 16:49:44 +00:00
486a01294a Corrected QPX SIMD width 2017-02-23 16:47:56 +00:00
586a7c90b7 Merge branch 'develop' into feature/bgq-asm 2017-02-23 00:26:59 +00:00
e099dcdae7 Merge branch 'develop' into feature/bgq-asm 2017-02-23 00:25:29 +00:00
4e7ab3166f Refactoring header layout 2017-02-22 18:09:33 +00:00
aac80cbb44 Bug fix from Chris K 2017-02-22 12:19:09 -05:00
c80948411b Added tRotate function and MaddRealPart struct for generic SIMD, bugfix in MultRealPart and minor cosmetic changes. 2017-02-22 14:57:10 +00:00
95625a7bd1 Use Grid Integer type 2017-02-22 13:09:32 +00:00
0796696733 Emulated integer vector type for QPX and generic SIMD instruction sets. 2017-02-22 12:01:36 +00:00
cc773ae70c Merge pull request #89 from sunpho84/prepend_package_with_grid
Prepending PACKAGE_ with GRID_ in Config.h
2017-02-22 00:52:10 +00:00
d21c51b9be Merge pull request #88 from sunpho84/pickpoketting
now it is possible to pass {coords list} to a peek or poke
2017-02-22 00:51:33 +00:00
597a7b4b3a Merge pull request #81 from edbennett/develop
Fix misleading message: "doxygen-pdf requires doxygen-pdf"
2017-02-22 00:50:59 +00:00
1c30e9a961 Verified 2017-02-21 23:01:25 +00:00
041884acf0 Prepending PACKAGE_ with GRID_ in Config.h
Avoid polluting linking progr
2017-02-21 22:51:36 +01:00
15e668eef1 now it is possible to pass {coords list} to a peek or poke 2017-02-21 22:48:38 +01:00
bf7e3f20d4 Staggaered fermion optimised version 2017-02-21 14:35:42 +00:00
3ae92fa2e6 Global changes to parallel_for structure.
Move the comms flags to more sensible names
2017-02-21 05:24:27 -05:00
3906cd2149 Stencil fix on BNL KNL system 2017-02-20 17:51:31 -05:00
5a1fb29db7 Useful debug code info to preserve 2017-02-20 17:49:23 -05:00
661fc4d3d1 Debug AVX512 exchange code paths 2017-02-20 17:48:36 -05:00
41009cc142 Move excange into the stencil only; keep Cshift fully general 2017-02-20 17:48:04 -05:00
37720c4db7 Count bytes off node only 2017-02-20 17:47:40 -05:00
1a30455a10 1000 iters on bmark for more accurate timing 2017-02-20 17:47:01 -05:00
cd0da81196 Merge branch 'feature/bgq-asm' of https://github.com/paboyle/Grid into feature/bgq-asm 2017-02-16 18:52:30 -05:00
f246fe3304 Improvements to avx for invertible to avoid latent bug 2017-02-16 23:52:44 +00:00
8a29c16bde Faster gather exchange 2017-02-16 23:52:22 +00:00
d68907fc3e Debug temp 2017-02-16 18:51:35 -05:00
5c0adf7bf2 Make clang happy with parenthesis 2017-02-16 23:51:33 +00:00
be3a8249c6 Faster gather 2017-02-16 23:51:15 +00:00
bd600702cf Vectorise the XYZT face gathering better.
Hard coded for simd_layout <= 2 in any given spread out direction; full generality is inconsistent
with efficiency.
2017-02-15 11:11:04 +00:00
aca7a3ef0a Optimisation control improvements 2017-02-10 18:22:31 -05:00
2c246551d0 Overlap comms and compute options in wilson kernels 2017-02-07 01:37:10 -05:00
71ac2e7940 Faster RNG init 2017-02-07 01:33:23 -05:00
2bf4688e83 Running on BNL KNL 2017-02-07 01:32:10 -05:00
a48ee6f0f2 Don't use MPI3_leader any more. No real gain and complex 2017-02-07 01:31:24 -05:00
73547cca66 MPI3 working i think 2017-02-07 01:30:02 -05:00
123c673db7 Policy to control async or sync SendRecv 2017-02-07 01:24:54 -05:00
61f82216e2 Communicator Policy, NodeCount distinct from Rank count 2017-02-07 01:22:53 -05:00
8e7ca92278 Debugged cshift case 2017-02-07 01:21:32 -05:00
485ad6fde0 Stencil working in SHM MPI3 2017-02-07 01:20:39 -05:00
6ea2184e18 OMP define change 2017-02-07 01:17:16 -05:00
fdc170b8a3 Parallel fors in lattice transfer 2017-02-07 01:16:39 -05:00
060da786e9 Comms benchmark improvements 2017-02-07 01:07:39 -05:00
85c7bc4321 Bug fixes for cases that physics code couldn't hit but latent
and discovered on KNL (long vector, y SIMD dir) and checker dir set to y.
Remove the assertions on these code paths now they are tested.
2017-02-07 01:01:15 -05:00
0883d6a7ce Overlap comms compute support; make reg naming consistent with bgq aasm 2017-02-07 00:59:32 -05:00
9ff97b4711 Improved stencil tests passing all on KNL multinode 2017-02-07 00:58:34 -05:00
b5e9c900a4 Better printing and signal handling options 2017-02-07 00:57:55 -05:00
4bbdfb434c Overlap comms compute modifications 2017-02-07 00:57:01 -05:00
c94133af49 Added iteration reporting to CG and mixed CG
Added ability to manually change the initial CG inner tolerance in mixed CG
Added .hpp files to filelist script
2017-02-02 17:04:42 -05:00
e7d8030a64 operator>> for serialisable enums 2017-02-01 15:51:08 -08:00
d775fbb2f9 Gammas: code cleaning and gamma_L implementation & test 2017-02-01 15:45:05 -08:00
863855f46f header fix 2017-02-01 11:59:44 -08:00
419af7610d New gamma matrices tidying: generated code is confined to Gamma.* for readability 2017-02-01 11:23:12 -08:00
7da7d263c4 typo 2017-01-30 10:53:13 -08:00
1140573027 Gamma adj fix: now in Grid namespace to avoid collisions 2017-01-30 10:53:04 -08:00
a0cfbb6e88 Merge branch 'feature/gammas' into feature/hadrons
# Conflicts:
#	.gitignore
#	lib/qcd/spin/Dirac.cc
#	scripts/filelist
2017-01-30 09:10:49 -08:00
515a26b3c6 gammas: copyright update 2017-01-30 09:07:09 -08:00
c946d3bf3f Merge branch 'develop' of github.com:edbennett/Grid into develop 2017-01-27 22:12:11 +00:00
1c68098780 fix misleading message: "doxygen-pdf requires doxygen-pdf" 2017-01-27 22:04:26 +00:00
899e685627 Merge branch 'feature/sitmo_rng' into develop 2017-01-27 14:15:56 +00:00
3bf993d81a gitignore update 2017-01-26 17:00:59 -08:00
fad743fbb1 Build system sanity check: corrected several headers not in the <Grid/*> format 2017-01-26 17:00:41 -08:00
ef8d3831eb Temporary patch the threading error in InsertSlice and ExtractSlice
Find source and fix the error
2017-01-25 18:12:04 +00:00
70ed9fc40c Updating the engine to the last version 2017-01-25 18:10:41 +00:00
4d3787db65 Hadrons fixed for new gammas, Meson only does one contraction but this’ll change in the future 2017-01-25 09:59:00 -08:00
677757cfeb Added and tested SITMO PRNG 2017-01-25 12:47:22 +00:00
05cb6d318a gammas: adjoint implemented as a symbolic operation 2017-01-24 18:07:43 -08:00
0432e30256 Gamma right multiply code fix (now passes consistency check) 2017-01-24 17:36:23 -08:00
2c3ebc1e07 .gitignore update 2017-01-24 17:35:42 -08:00
068b28af2d Extensive gamma test program 2017-01-24 17:35:29 -08:00
f7db342f49 Serialisable enums can be converted to int 2017-01-24 17:33:26 -08:00
d65e81518f Merge branch 'feature/hadrons' into develop 2017-01-24 09:21:44 -08:00
a37e71f362 New automatic implementation of gamma matrices, Meson and SeqGamma are broken 2017-01-23 19:13:43 -08:00
05c1924819 Timing loop change 2017-01-23 10:43:45 +00:00
b7da264b0a Hadrons: Application is not storing the environment ref but calling getInstance() each time, solving a very nasty set fault on Linux/KNL 2017-01-21 13:40:23 -08:00
74ac2aa676 Merge branch 'feature/serialisation-hdf5' into feature/hadrons 2017-01-20 14:03:51 -08:00
4c75095c61 HDF5: header fix 2017-01-20 12:14:01 -08:00
afa095d33d HDF5: better complex number support 2017-01-20 12:10:41 -08:00
6b5259cc10 HDF5 detects if a name is a dataset or not without using exception catching 2017-01-20 11:03:19 -08:00
7423a352c5 HDF5: typos 2017-01-19 18:33:04 -08:00
81e66d6631 HDF5: revert back to native types 2017-01-19 18:24:53 -08:00
ade1058e5f Hdf5Type does not need to be a pointer anymore 2017-01-19 18:23:55 -08:00
6eea9e4da7 HDF5 types static initialisation is mysteriously buggy on BG/Q, changing strategy 2017-01-19 18:02:53 -08:00
2c673666da Standardisation of HDF5 types 2017-01-19 17:19:12 -08:00
d6401e6d2c Merge branch 'feature/hadrons' into develop 2017-01-19 14:10:01 -08:00
24d3d31b01 Genetic scheduler: uses insert instead of emplace for better compiler compatibility 2017-01-19 14:08:22 -08:00
5405526424 Code typo 2017-01-18 22:42:19 -08:00
f3f0b6fef9 serious rewriting of Test_serialisation, now crashes if IO inconsistent 2017-01-18 17:41:05 -08:00
654e0b0fd0 Serialisable object are now comparable with == 2017-01-18 17:40:32 -08:00
4be08ebccc debug code cleaning 2017-01-18 17:39:59 -08:00
f599cb5b17 HDF5 serial IO implemented and tested 2017-01-18 16:50:21 -08:00
a4a509497a Merge branch 'develop' of github.com:paboyle/Grid into develop 2017-01-17 16:22:22 -08:00
5803933aea First implementation of HDF5 serial IO writer, reader is still empty 2017-01-17 16:21:18 -08:00
7cf833dfe9 Fixed compilation error in tests hadrons (capital letter in dir name) 2017-01-17 11:00:54 +00:00
91a3534054 Lattice slice utilities now thread safe 2017-01-16 06:32:25 +00:00
16a8e3d0d4 gitignore update for ST3 2017-01-16 06:32:05 +00:00
41df1db811 Hadrons: number of dimensions entirely determined by the initial grid 2017-01-11 18:37:49 +00:00
c3b6d573b9 Merge branch 'feature/bgq-asm' of https://github.com/paboyle/Grid into feature/bgq-asm 2016-12-30 22:42:17 +00:00
1e179c903d Worried about integer; suspect where statements are broken 2016-12-27 17:46:38 +00:00
669cfca9b7 No inline 2016-12-27 17:45:40 +00:00
ff2f559a57 Remove inline on gather optimised path 2016-12-27 17:45:19 +00:00
03c81bd902 Merge branch 'feature/bgq-asm' of https://github.com/paboyle/Grid into feature/bgq-asm 2016-12-27 11:25:35 +00:00
a869addef1 Stats switch off 2016-12-27 11:25:22 +00:00
1caa3fbc2d LOCK UNLOCK only 2016-12-27 11:24:45 +00:00
3d21297bbb Call the fast path compressor for wilson kernels to avoid if else on projector 2016-12-27 11:23:13 +00:00
25efefc5b4 Back to original thread policy post test 2016-12-23 09:49:04 +00:00
eabf316ed9 BGQ performance ASM 2016-12-22 21:56:08 +00:00
04ae7929a3 BGQ or KNL assembler now 2016-12-22 17:53:22 +00:00
caba0d42a5 L1p controls 2016-12-22 17:52:55 +00:00
9ae81c06d2 L1p controls for BG/Q 2016-12-22 17:52:21 +00:00
0903c48caa Hot start SU3 2016-12-22 17:51:45 +00:00
7dc36628a1 QPX finishing 2016-12-22 17:50:48 +00:00
b8cdb3e90a Debug hack; raises from 62GF/s to 72 GF/s per node on BG/Q 2016-12-22 17:50:14 +00:00
5241245534 Default to static scheduling 2016-12-22 17:49:21 +00:00
960316e207 type conversion in printf 2016-12-22 17:27:01 +00:00
3215ae6b7e Hadrons: genetic scheduler crashes in multi-thread with 1 module, multi-threading deactivated for now 2016-12-22 00:26:30 +01:00
7a85fddc7e Hadrons: modification of registration mechanism to allow for persistent caches 2016-12-22 00:25:36 +01:00
f8d11ff673 better serialisable enums (can be encapsulated into classes) 2016-12-20 12:31:49 +01:00
3f2d53a994 BGQ assembler beginning 2016-12-20 10:21:26 +00:00
8a337f3070 Move cayley into mainstream tests 2016-12-18 02:35:31 +00:00
a59f5374d7 Evade warning 2016-12-18 02:23:55 +00:00
4b220972ac Warning fix 2016-12-18 02:14:17 +00:00
629f43e36c Return statement needed 2016-12-18 02:09:37 +00:00
a3172b3455 Precision error 2016-12-18 02:07:45 +00:00
3e6945cd65 Fixing AVX Z-mobius 2016-12-18 02:05:11 +00:00
87be03006a AVX 512 code broke other compiles; fixing 2016-12-18 01:45:09 +00:00
f17436fec2 Bad commit fixed 2016-12-18 01:27:34 +00:00
4d8b01b7ed Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2016-12-18 00:56:57 +00:00
fa6acccf55 Zmobius asm 2016-12-18 00:56:19 +00:00
55cb22ad67 Z mobius bmark 2016-12-18 00:55:37 +00:00
df9108154d Debugged 2 versions of assembler; ls vectorised, xyzt vectorised 2016-12-17 23:47:51 +00:00
b3e7f600da Partial implementation of 4d vectorisation assembler 2016-12-16 23:50:30 +00:00
d4071daf2a Template specialise 2016-12-16 22:28:29 +00:00
a2a6329094 AVX512 only for ASM compilation 2016-12-16 22:03:29 +00:00
eabc577940 Assembler possibly working 2016-12-16 16:55:36 +00:00
67d72000e7 Hadrons: more legal banner fixes 2016-12-15 18:26:39 +00:00
80cef1c78f Hadrons: legal banner fix 2016-12-15 18:21:52 +00:00
91e98b1dd5 Merge branch 'feature/hadrons' into develop 2016-12-15 18:15:56 +00:00
b791c274b0 Revert "AVX: uninitialised variable fix"
This reverts commit c22c3db9ad.
2016-12-15 18:15:35 +00:00
596dd570c7 Linux linking fix 2016-12-15 12:26:53 +00:00
cad158e42f Hadrons: tests improvement 2016-12-14 19:41:51 +00:00
f63fac0c69 Hadrons: the XML runner can use a precomputed schedule 2016-12-14 19:41:30 +00:00
ab92de89ab Hadrons: utility to schedule a run 2016-12-14 19:41:04 +00:00
846272b037 Hadrons: option to save and load a schedule 2016-12-14 19:40:36 +00:00
f3e49e4b73 Hadrons: module templates update 2016-12-14 18:19:46 +00:00
decbb61ec1 Hadrons: XML driven program is again a binary installed with Grid 2016-12-14 18:19:24 +00:00
7e2482aad1 Hadrons: cpde cleaning 2016-12-14 18:04:21 +00:00
e1653a9f94 Hadrons: size fix in DWF module 2016-12-14 18:02:36 +00:00
ea40854e0b Hadrons: type names are demangled 2016-12-14 18:02:18 +00:00
34df71e755 Hadrons: function to save an application as an XML file 2016-12-14 18:01:56 +00:00
3af663e17b Hadrons: modules remember their factory registration name 2016-12-14 17:59:45 +00:00
c22c3db9ad AVX: uninitialised variable fix 2016-12-13 19:05:58 +00:00
013e710c7d Hadrons: 3pt function test improvement 2016-12-13 19:04:43 +00:00
16693bd69d Hadrons: scheduler heuristic benchmark 2016-12-13 19:02:32 +00:00
de8f80cf94 Hadrons: genetic operators improvement 2016-12-13 19:02:05 +00:00
426197e446 Nc=3 2016-12-12 09:10:54 +00:00
99e2c1e666 Kernels options 2016-12-12 09:08:53 +00:00
1440565a10 Decrease verbosity 2016-12-12 09:08:04 +00:00
e9f0c0ea39 Staggered kernels options 2016-12-12 09:07:38 +00:00
4a87486365 Hadrons: a bit of cleaning in the scheduler 2016-12-10 21:14:13 +01:00
fe187e9ed3 Compiles and passes under ZMobius with assembler 2016-12-10 00:47:48 +00:00
0091b50f49 Zmobius working -- not asm yet 2016-12-09 22:51:32 +00:00
fb8d4b2357 Lots of debug on performance Mobius 2016-12-08 17:28:28 +00:00
ff71a8e847 Ready for sim 2016-12-08 17:00:32 +00:00
83fa038bdf Streaming stores 2016-12-08 16:58:42 +00:00
7a61feb6d3 Allocator added with caching for Linux VM subsystem optimisation 2016-12-08 16:58:01 +00:00
69ae817d1c Updates for supporting Mobius better 2016-12-08 16:43:28 +00:00
51322da6f8 Hadrons: genetic scheduler improvement 2016-12-07 09:00:45 +09:00
49c3eeb378 Hadrons: more verbose genetic parameters 2016-12-07 08:59:58 +09:00
c56707e003 useless debug message removed 2016-12-07 08:59:20 +09:00
5b3edf08a4 Hadrons: sequential gamma source 2016-12-06 12:13:19 +09:00
bd1d1cca34 Hadrons: code cleaning 2016-12-06 12:12:59 +09:00
646b11f5c2 Hadrons: exposing scheduler settings 2016-12-06 12:12:05 +09:00
a683a0f55a Hadrons: meson tests renamed spectrum 2016-12-06 12:11:44 +09:00
e6effcfd95 Hadrons: more contractions in the spectrum test 2016-12-05 17:41:58 +09:00
aa016f61b9 Hadrons: empty baryon contractions 2016-12-05 17:26:57 +09:00
d42a1b73c4 Hadrons: code cleaning 2016-12-05 17:26:36 +09:00
d292657ef7 Hadrons: more module templates 2016-12-05 17:26:17 +09:00
d1f7c6b94e Hadrons: templatisation of the fermion implementation 2016-12-05 16:47:29 +09:00
7ae734103e Hadrons: namespace macro to tackle GCC 5 bug 2016-12-05 14:29:32 +09:00
7a1ac45679 Hadrons: configure.ac Linux typo 2016-12-05 14:00:10 +09:00
320268fe93 Hadrons: code cleaning 2016-12-05 13:57:34 +09:00
dd6fb140c5 Hadrons: big module reorganisation 2016-12-05 13:53:31 +09:00
0b4f680d28 Hadrons: meson run test 2016-12-05 11:44:58 +09:00
a69086ba1f Hadrons: application run minor fixes 2016-12-05 11:44:36 +09:00
7433eed274 Hadrons: module creation fix 2016-12-05 11:44:16 +09:00
ee5b1fe043 Hadrons: freeing object message fix 2016-12-05 09:08:45 +09:00
1540616b22 Hadrons: integer types cleanup 2016-12-05 08:53:48 +09:00
8190523e4c Hadrons: type fix in module creation 2016-12-02 11:04:34 +09:00
b5555d85a7 Hadrons: generelalised FImpl for actions 2016-12-02 11:04:15 +09:00
9ad3d3453e Hadrons is now a library, the previous XML driven program is now a test 2016-12-01 21:36:29 +09:00
d8b716d2cd Hadrons: static initialisation fixed 2016-12-01 15:43:16 +09:00
c097fd041a Merge branch 'develop' of https://github.com/paboyle/Grid into feature/staggering 2016-11-29 13:44:17 +00:00
77fb25fb29 Push 5d tests 2016-11-29 13:43:56 +00:00
389e0a77bd Staggerd Fermion 5D 2016-11-29 13:13:56 +00:00
43928846f2 first steps to make Hadrons a library 2016-11-28 16:02:15 +09:00
fabcd4179d Hadrons: propagator type coming from the fermion implementation 2016-11-28 14:02:10 +09:00
a8843c9af6 Code cleaning, the fermion implementation can be sepcified using the macro FIMPL 2016-11-27 16:47:22 +09:00
7a1a7a685e Merge branch 'feature/fft-opt' into feature/hadrons 2016-11-27 15:32:03 +09:00
95f43d27ae Merge branch 'develop' of https://github.com/paboyle/Grid into feature/staggering 2016-11-22 13:49:22 +00:00
668ca57702 Merge branch 'develop' of https://github.com/paboyle/Grid into feature/staggering 2016-11-22 13:49:11 +00:00
1aa695cd78 Hadrons: merge typo 2016-11-10 18:38:30 +00:00
13a8997789 Merge branch 'release/v0.6.0' into feature/hadrons
# Conflicts:
#	Makefile.am
2016-11-08 20:43:39 +00:00
ee686a7d85 Compiles now 2016-11-03 16:58:23 +00:00
1c5b7a6be5 Staggered phases first cut, c1, c2, u0 2016-11-03 16:26:56 +00:00
164d3691db Staggered 2016-11-01 14:24:22 +00:00
a034e9901b Merge branch 'develop' into feature/hadrons 2016-09-20 13:49:33 +01:00
7ff7c7d90d Merge branch 'develop' into feature/hadrons 2016-08-04 16:22:10 +01:00
a2e9430abe Hadrons: fix after build system update 2016-08-03 17:14:32 +01:00
2485ef9c9c Merge branch 'feature/new-build' into feature/hadrons
# Conflicts:
#	Makefile.am
#	scripts/copyright
2016-08-03 16:49:16 +01:00
e0b7004f96 Merge branch 'master' into feature/hadrons 2016-07-01 15:54:34 +01:00
75fc295f6e Merge branch 'hadrons' into feature/hadrons 2016-06-14 17:51:15 +01:00
0b731b5d80 Hadrons: genetic scheduler parameter fix 2016-06-06 17:46:53 +01:00
8e2078be71 Hadrons: environment with fully generic object store 2016-06-06 17:45:37 +01:00
1826ed06a3 Merge branch 'master' into hadrons 2016-05-27 16:50:31 +01:00
3ff96c502b Merge branch 'master' into hadrons 2016-05-12 19:24:18 +01:00
15a0908bfc Merge branch 'master' into hadrons 2016-05-12 18:35:46 +01:00
bb2125962b Hadrons: finished implementation of 5D quarks 2016-05-12 18:34:42 +01:00
232fda5fe1 Hadrons: DWF action 2016-05-12 18:34:10 +01:00
2b31bf61ff Hadrons: message fix 2016-05-12 18:33:49 +01:00
afe5a94745 Hadrons: getModule with upcast 2016-05-12 18:33:36 +01:00
7ae667c767 Hadrons: module template update 2016-05-12 18:33:08 +01:00
07f0b69784 Merge branch 'master' into hadrons 2016-05-12 13:02:18 +01:00
5c06e89d69 Hadrons: code cleaning 2016-05-12 12:49:49 +01:00
3d75e0f0d1 Hadrons: MQuark fix 2016-05-12 12:02:15 +01:00
362f255100 Hadrons: module parameters can now be accessed from outside 2016-05-12 11:59:28 +01:00
3d78ed03ef Merge branch 'master' into hadrons 2016-05-11 15:21:46 +01:00
835003b3c5 Hadrons: removed useless gauge global parameters 2016-05-11 15:01:52 +01:00
328d213c9e Hadrons: FS case sensitivity fix 2016-05-11 14:44:14 +01:00
56a8d7a5bc Hadrons: build system fix 2016-05-11 10:27:14 +01:00
78198d1b04 Hadrons: size fix for module graph with one vertex 2016-05-10 20:13:28 +01:00
84fa2bdce6 Hadrons: modules moved in their own directory & utility script to add new modules 2016-05-10 20:12:48 +01:00
29dfe99e7c Hadrons: more scheduler optimizations 2016-05-10 19:19:38 +01:00
d604580e5a Hadrons: all objects/modules mapped to an integer address system to remove string operations from scheduling 2016-05-10 19:07:41 +01:00
7dfdc9baa0 Hadrons: lattice dynamic cast fix 2016-05-10 10:41:20 +01:00
9e986654e6 Hadrons: first version of the genetic scheduler 2016-05-09 14:49:06 +01:00
df3fbc477e Hadrons: code cleaning 2016-05-07 13:26:56 -07:00
bb580ae077 Hadrons: significant overhaul of the object registration system, previous version didn't allow dry runs 2016-05-07 13:19:38 -07:00
2c226753ab Hadrons: comments on graph theory algorithm complexity 2016-05-06 06:35:11 -07:00
ea0cea668e Hadrons: minor code cleaning 2016-05-05 16:13:14 -07:00
75cd72a421 Hadrons: memory management for fermion matrices, dynamic ownership in garbage collector 2016-05-04 19:11:03 -07:00
cbe52b0659 Hadrons: debug message removed 2016-05-04 12:20:33 -07:00
3aa6463ede Hadrons: general lattice store & a lot of code cleaning 2016-05-04 12:17:27 -07:00
312637e5fb Merge branch 'master' into hadrons
# Conflicts:
#	lib/Log.h
2016-05-04 12:16:18 -07:00
798d8f7340 Hadrons: Modules: better log messages 2016-05-03 18:17:58 -07:00
ba878724ce Hadrons: sources are now independent modules 2016-05-03 18:17:28 -07:00
b865dd9da8 Hadrons: solver renaming 2016-05-03 18:16:57 -07:00
8b313a35ac Hadrons: random and NERSC gauge configurations 2016-05-03 17:08:42 -07:00
02ec23cdad Hadrons: Fermion actions and gauge fields are modules now 2016-05-03 17:08:42 -07:00
6e83b6a203 Hadrons: namespace reorganisation, now everything is in Grid::Hadrons, the 'using Grid::operator<<' statement is used to prevent a very nasty compilation error with GCC. 2016-05-02 19:31:21 -07:00
48fcc34d72 CMeson: first implementation, still need proper output 2016-05-01 18:31:40 -07:00
d08d93c44c Merge branch 'master' into hadrons 2016-05-01 18:30:44 -07:00
0ab10cdedb Merge branch 'master' into hadrons 2016-05-01 16:08:05 -07:00
22653edf12 Merge branch 'master' into hadrons 2016-05-01 15:55:58 -07:00
12d2a95846 Merge branch 'master' into hadrons 2016-05-01 15:05:02 -07:00
978cf52f6b Merge branch 'master' into hadrons 2016-05-01 14:53:38 -07:00
63b730de80 Hadrons: for the moment, test with unit gauge 2016-05-01 14:50:57 -07:00
7905c5b8e5 Hadrons: Z2 source code fix 2016-05-01 14:49:45 -07:00
5e4b58ac40 Hadrons: Z2 source expression fix 2016-05-01 12:49:26 -07:00
468d8dc682 Merge branch 'master' into hadrons 2016-05-01 12:03:24 -07:00
beb11fd4ef Merge branch 'master' into hadrons 2016-05-01 10:32:24 -07:00
d7662b5175 Merge branch 'master' into hadrons 2016-04-30 00:24:59 -07:00
dc5f32e5f0 Merge branch 'master' into hadrons 2016-04-30 00:18:31 -07:00
1869d28429 Hadrons: first prototype with working inversions 2016-04-30 00:17:04 -07:00
405b175665 Merge branch 'master' into hadrons 2016-04-30 00:16:06 -07:00
e33b0f6ff7 cleaner output 2016-04-16 08:41:53 +01:00
9ee54e0db7 debug output removed 2016-04-16 08:41:28 +01:00
feae35d92c Hadrons: pass strings by value 2016-04-16 08:41:12 +01:00
3834d81181 Merge branch 'master' into hadrons 2016-04-14 15:15:45 +01:00
179e82b5ca Merge branch 'master' into hadrons 2016-03-08 12:55:33 +00:00
f2c59c8730 Merge branch 'master' into hadrons 2016-03-02 17:15:05 +00:00
fdd0848593 Hadrons: license text update 2016-02-25 12:07:21 +00:00
92f666905f copyright script update to 80 column text 2016-02-25 12:06:24 +00:00
5980fa8640 test implementation of DWF inverter 2016-02-25 11:56:16 +00:00
a0d8eb2c24 minor code cleaning 2016-02-23 16:33:00 +00:00
1e10b4571d fix after Grid update 2016-02-23 16:21:45 +00:00
02f8b84ac9 Merge branch 'master' into hadrons 2016-02-23 16:13:39 +00:00
cfd368596d Merge branch 'master' into hadrons 2016-02-22 15:25:02 +00:00
ae682674e0 Hadrons: first full implementation of the scheduler 2016-01-13 20:23:51 -08:00
17c43f49ac Hadrons: application class now take parameter file name as argument 2016-01-13 20:22:37 -08:00
30146e977c gitignore update 2016-01-13 20:20:43 -08:00
54eacec261 Hadrons: namespace std not used anymore in compiled sources 2015-12-23 14:30:33 +00:00
76c78f04e2 Hadrons: first complete prototype for run loop 2015-12-23 14:21:35 +00:00
379580cd89 Merge branch 'master' into hadrons 2015-12-23 14:20:22 +00:00
14a80733f9 Merge branch 'master' into hadrons 2015-12-08 13:57:53 +00:00
d4db009a58 Hadrons: starting scheduler implementation 2015-12-07 18:26:38 +00:00
20ce7e0270 Hadrons: algorithm to determine all possible topological ordering 2015-12-07 15:46:36 +00:00
bb195607ab Hadrons: fix in topological sort algorithm name 2015-12-02 19:40:11 +00:00
6f090e22c0 Hadrons: graph topological sort 2015-12-02 19:33:34 +00:00
339e983172 Merge branch 'master' into hadrons 2015-12-02 14:38:04 +00:00
4a7f3d1b7b Merge branch 'master' into hadrons
# Conflicts:
#	configure
2015-12-02 10:57:51 +00:00
c4e2202550 First graph class implementation and test 2015-11-05 14:28:14 +00:00
538b16610b First commit for measurement software 'Hadrons' 2015-10-27 17:33:18 +00:00
299 changed files with 40558 additions and 5193 deletions

15
.gitignore vendored
View File

@ -9,6 +9,7 @@
################
*~
*#
*.sublime-*
# Precompiled Headers #
#######################
@ -103,4 +104,16 @@ lib/fftw/*
# libtool macros #
##################
m4/lt*
m4/libtool.m4
m4/libtool.m4
# Buck files #
##############
.buck*
buck-out
BUCK
make-bin-BUCK.sh
# generated sources #
#####################
lib/qcd/spin/gamma-gen/*.h
lib/qcd/spin/gamma-gen/*.cc

View File

@ -102,5 +102,5 @@ script:
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
- make -j4
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi

View File

@ -1,5 +1,5 @@
# additional include paths necessary to compile the C++ library
SUBDIRS = lib benchmarks tests
SUBDIRS = lib benchmarks tests extras
include $(top_srcdir)/doxygen.inc

View File

@ -48,9 +48,9 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
int maxlat=16;
for(int lat=4;lat<=maxlat;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
int maxlat=24;
for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){
std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
@ -124,8 +124,8 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
for(int lat=4;lat<=maxlat;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){
std::vector<int> latt_size ({lat,lat,lat,lat});
@ -194,14 +194,14 @@ int main (int argc, char ** argv)
}
Nloop=100;
Nloop=10;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
for(int lat=4;lat<=maxlat;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){
std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
@ -281,8 +281,8 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
for(int lat=4;lat<=maxlat;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){
std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
@ -324,8 +324,8 @@ int main (int argc, char ** argv)
(void *)&rbuf[mu][0],
recv_from_rank,
bytes);
// Grid.StencilSendToRecvFromComplete(requests);
// requests.resize(0);
Grid.StencilSendToRecvFromComplete(requests);
requests.resize(0);
comm_proc = mpi_layout[mu]-1;

View File

@ -37,27 +37,27 @@ struct scal {
d internal;
};
Gamma::GammaMatrix Gmu [] = {
Gamma::GammaX,
Gamma::GammaY,
Gamma::GammaZ,
Gamma::GammaT
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT
};
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF;
typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::vector<int> latt4 = GridDefaultLatt();
const int Ls=8;
const int Ls=16;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
@ -71,35 +71,66 @@ int main (int argc, char ** argv)
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
LatticeFermion src (FGrid); random(RNG5,src);
#if 0
src = zero;
{
std::vector<int> origin({0,0,0,latt4[2]-1,0});
SpinColourVectorF tmp;
tmp=zero;
tmp()(0)(0)=Complex(-2.0,0.0);
std::cout << " source site 0 " << tmp<<std::endl;
pokeSite(tmp,src,origin);
}
#else
RealD N2 = 1.0/::sqrt(norm2(src));
src = src*N2;
#endif
LatticeFermion result(FGrid); result=zero;
LatticeFermion ref(FGrid); ref=zero;
LatticeFermion tmp(FGrid);
LatticeFermion err(FGrid);
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
LatticeGaugeField Umu(UGrid);
random(RNG4,Umu);
LatticeGaugeField Umu5d(FGrid);
SU3::HotConfiguration(RNG4,Umu);
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
#if 0
Umu=1.0;
for(int mu=0;mu<Nd;mu++){
LatticeColourMatrix ttmp(UGrid);
ttmp = PeekIndex<LorentzIndex>(Umu,mu);
// if (mu !=2 ) ttmp = 0;
// ttmp = ttmp* pow(10.0,mu);
PokeIndex<LorentzIndex>(Umu,ttmp,mu);
}
std::cout << GridLogMessage << "Forced to diagonal " << std::endl;
#endif
////////////////////////////////////
// Naive wilson implementation
////////////////////////////////////
// replicate across fifth dimension
LatticeGaugeField Umu5d(FGrid);
std::vector<LatticeColourMatrix> U(4,FGrid);
for(int ss=0;ss<Umu._grid->oSites();ss++){
for(int s=0;s<Ls;s++){
Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
}
}
////////////////////////////////////
// Naive wilson implementation
////////////////////////////////////
std::vector<LatticeColourMatrix> U(4,FGrid);
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
}
std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl;
if (1)
{
@ -121,6 +152,7 @@ int main (int argc, char ** argv)
RealD NP = UGrid->_Nprocessors;
std::cout << GridLogMessage << "Creating action operator " << std::endl;
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
@ -136,10 +168,11 @@ int main (int argc, char ** argv)
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
int ncall =100;
int ncall =1000;
if (1) {
FGrid->Barrier();
Dw.ZeroCounters();
Dw.Dhop(src,result,0);
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
@ -153,12 +186,22 @@ int main (int argc, char ** argv)
double flops=1344*volume*ncall;
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
/*
if(( norm2(err)>1.0e-4) ) {
std::cout << "RESULT\n " << result<<std::endl;
std::cout << "REF \n " << ref <<std::endl;
std::cout << "ERR \n " << err <<std::endl;
FGrid->Barrier();
exit(-1);
}
*/
assert (norm2(err)< 1.0e-4 );
Dw.Report();
}
@ -182,21 +225,13 @@ int main (int argc, char ** argv)
LatticeFermion sresult(sFGrid);
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
for(int x=0;x<latt4[0];x++){
for(int y=0;y<latt4[1];y++){
for(int z=0;z<latt4[2];z++){
for(int t=0;t<latt4[3];t++){
for(int s=0;s<Ls;s++){
std::vector<int> site({s,x,y,z,t});
SpinColourVector tmp;
peekSite(tmp,src,site);
pokeSite(tmp,ssrc,site);
}}}}}
localConvert(src,ssrc);
std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
FGrid->Barrier();
double t0=usecond();
sDw.Dhop(ssrc,sresult,0);
sDw.ZeroCounters();
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
sDw.Dhop(ssrc,sresult,0);
@ -210,46 +245,47 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
// std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
sDw.Report();
if(0){
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
sDw.Dhop(ssrc,sresult,0);
PerformanceCounter Counter(i);
Counter.Start();
sDw.Dhop(ssrc,sresult,0);
Counter.Stop();
Counter.Report();
}
}
std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
RealD sum=0;
for(int x=0;x<latt4[0];x++){
for(int y=0;y<latt4[1];y++){
for(int z=0;z<latt4[2];z++){
for(int t=0;t<latt4[3];t++){
for(int s=0;s<Ls;s++){
std::vector<int> site({s,x,y,z,t});
SpinColourVector normal, simd;
peekSite(normal,result,site);
peekSite(simd,sresult,site);
sum=sum+norm2(normal-simd);
if (norm2(normal-simd) > 1.0e-6 ) {
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl;
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
}
}}}}}
std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
assert (sum< 1.0e-4 );
err=zero;
localConvert(sresult,err);
err = err - ref;
sum = norm2(err);
std::cout<<GridLogMessage<<" difference between normal ref and simd is "<<sum<<std::endl;
if(sum > 1.0e-4 ){
std::cout<< "sD REF\n " <<ref << std::endl;
std::cout<< "sD ERR \n " <<err <<std::endl;
}
// assert(sum < 1.0e-4);
if (1) {
err=zero;
localConvert(sresult,err);
err = err - result;
sum = norm2(err);
std::cout<<GridLogMessage<<" difference between normal result and simd is "<<sum<<std::endl;
if(sum > 1.0e-4 ){
std::cout<< "sD REF\n " <<result << std::endl;
std::cout<< "sD ERR \n " << err <<std::endl;
}
assert(sum < 1.0e-4);
if(1){
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::DhopEO "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric )
std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)
std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm )
std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
LatticeFermion sr_eo(sFGrid);
LatticeFermion ssrc_e (sFrbGrid);
LatticeFermion ssrc_o (sFrbGrid);
LatticeFermion sr_e (sFrbGrid);
@ -257,33 +293,23 @@ int main (int argc, char ** argv)
pickCheckerboard(Even,ssrc_e,ssrc);
pickCheckerboard(Odd,ssrc_o,ssrc);
setCheckerboard(sr_eo,ssrc_o);
setCheckerboard(sr_eo,ssrc_e);
// setCheckerboard(sr_eo,ssrc_o);
// setCheckerboard(sr_eo,ssrc_e);
sr_e = zero;
sr_o = zero;
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::DhopEO "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
FGrid->Barrier();
sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
sDw.ZeroCounters();
sDw.stat.init("DhopEO");
// sDw.stat.init("DhopEO");
double t0=usecond();
for (int i = 0; i < ncall; i++) {
sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
}
double t1=usecond();
FGrid->Barrier();
sDw.stat.print();
// sDw.stat.print();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2;
@ -298,22 +324,26 @@ int main (int argc, char ** argv)
pickCheckerboard(Even,ssrc_e,sresult);
pickCheckerboard(Odd ,ssrc_o,sresult);
ssrc_e = ssrc_e - sr_e;
RealD error = norm2(ssrc_e);
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<< " vec nrm"<<norm2(sr_e) <<std::endl;
ssrc_o = ssrc_o - sr_o;
ssrc_o = ssrc_o - sr_o;
error+= norm2(ssrc_o);
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl;
if(error>1.0e-4) {
if(( error>1.0e-4) ) {
setCheckerboard(ssrc,ssrc_o);
setCheckerboard(ssrc,ssrc_e);
std::cout<< ssrc << std::endl;
std::cout<< "DIFF\n " <<ssrc << std::endl;
setCheckerboard(ssrc,sr_o);
setCheckerboard(ssrc,sr_e);
std::cout<< "CBRESULT\n " <<ssrc << std::endl;
std::cout<< "RESULT\n " <<sresult<< std::endl;
}
assert(error<1.0e-4);
}
}
if (1)
@ -321,28 +351,33 @@ int main (int argc, char ** argv)
ref = zero;
for(int mu=0;mu<Nd;mu++){
// ref = src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
// ref = src - Gamma(Gamma::Algebra::GammaX)* src ; // 1+gamma_x
tmp = U[mu]*Cshift(src,mu+1,1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
}
tmp =adj(U[mu])*src;
tmp =Cshift(tmp,mu+1,-1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
}
}
ref = -0.5*ref;
}
// dump=1;
Dw.Dhop(src,result,1);
std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl;
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "norm dag result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm dag ref "<< norm2(ref)<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
assert(norm2(err)<1.0e-4);
std::cout<<GridLogMessage << "norm dag diff "<< norm2(err)<<std::endl;
if((norm2(err)>1.0e-4)){
std::cout<< "DAG RESULT\n " <<ref << std::endl;
std::cout<< "DAG sRESULT\n " <<result << std::endl;
std::cout<< "DAG ERR \n " << err <<std::endl;
}
LatticeFermion src_e (FrbGrid);
LatticeFermion src_o (FrbGrid);
LatticeFermion r_e (FrbGrid);
@ -350,13 +385,18 @@ int main (int argc, char ** argv)
LatticeFermion r_eo (FGrid);
std::cout<<GridLogMessage << "Calling Deo and Doe and assert Deo+Doe == Dunprec"<<std::endl;
std::cout<<GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec"<<std::endl;
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd,src_o,src);
std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl;
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
// S-direction is INNERMOST and takes no part in the parity.
static int Opt; // these are a temporary hack
static int Comms; // these are a temporary hack
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
@ -369,6 +409,7 @@ int main (int argc, char ** argv)
{
Dw.ZeroCounters();
FGrid->Barrier();
Dw.DhopEO(src_o,r_e,DaggerNo);
double t0=usecond();
for(int i=0;i<ncall;i++){
Dw.DhopEO(src_o,r_e,DaggerNo);
@ -396,14 +437,19 @@ int main (int argc, char ** argv)
err = r_eo-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
assert(norm2(err)<1.0e-4);
if((norm2(err)>1.0e-4)){
std::cout<< "Deo RESULT\n " <<r_eo << std::endl;
std::cout<< "Deo REF\n " <<result << std::endl;
std::cout<< "Deo ERR \n " << err <<std::endl;
}
pickCheckerboard(Even,src_e,err);
pickCheckerboard(Odd,src_o,err);
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
assert(norm2(src_e)<1.0e-4);
assert(norm2(src_o)<1.0e-4);
//assert(norm2(src_e)<1.0e-4);
//assert(norm2(src_o)<1.0e-4);
Grid_finalize();
}

View File

@ -37,11 +37,11 @@ struct scal {
d internal;
};
Gamma::GammaMatrix Gmu [] = {
Gamma::GammaX,
Gamma::GammaY,
Gamma::GammaZ,
Gamma::GammaT
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT
};
void benchDw(std::vector<int> & L, int Ls, int threads, int report =0 );

View File

@ -66,7 +66,8 @@ int main (int argc, char ** argv)
Vec tsum; tsum = zero;
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
GridParallelRNG pRNG(&Grid);
pRNG.SeedFixedIntegers(std::vector<int>({56,17,89,101}));
std::vector<double> stop(threads);
Vector<Vec> sum(threads);
@ -77,8 +78,7 @@ int main (int argc, char ** argv)
}
double start=usecond();
PARALLEL_FOR_LOOP
for(int t=0;t<threads;t++){
parallel_for(int t=0;t<threads;t++){
sum[t] = x[t]._odata[0];
for(int i=0;i<Nloop;i++){

View File

@ -65,7 +65,7 @@ int main (int argc, char ** argv)
uint64_t Nloop=NLOOP;
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
LatticeVec z(&Grid); //random(pRNG,z);
LatticeVec x(&Grid); //random(pRNG,x);
@ -100,7 +100,7 @@ int main (int argc, char ** argv)
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
LatticeVec z(&Grid); //random(pRNG,z);
LatticeVec x(&Grid); //random(pRNG,x);
@ -138,7 +138,7 @@ int main (int argc, char ** argv)
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
LatticeVec z(&Grid); //random(pRNG,z);
LatticeVec x(&Grid); //random(pRNG,x);
@ -173,7 +173,7 @@ int main (int argc, char ** argv)
uint64_t Nloop=NLOOP;
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
LatticeVec z(&Grid); //random(pRNG,z);
LatticeVec x(&Grid); //random(pRNG,x);
LatticeVec y(&Grid); //random(pRNG,y);

View File

@ -113,6 +113,36 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Called " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
std::cout<<GridLogMessage << "******************"<<std::endl;
#define BENCH_ZDW(A,in,out) \
zDw.CayleyZeroCounters(); \
zDw. A (in,out); \
FGrid->Barrier(); \
t0=usecond(); \
for(int i=0;i<ncall;i++){ \
zDw. A (in,out); \
} \
t1=usecond(); \
FGrid->Barrier(); \
zDw.CayleyReport(); \
std::cout<<GridLogMessage << "Called ZDw " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
std::cout<<GridLogMessage << "******************"<<std::endl;
#define BENCH_DW_SSC(A,in,out) \
Dw.CayleyZeroCounters(); \
Dw. A (in,out); \
FGrid->Barrier(); \
t0=usecond(); \
for(int i=0;i<ncall;i++){ \
__SSC_START ; \
Dw. A (in,out); \
__SSC_STOP ; \
} \
t1=usecond(); \
FGrid->Barrier(); \
Dw.CayleyReport(); \
std::cout<<GridLogMessage << "Called " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
std::cout<<GridLogMessage << "******************"<<std::endl;
#define BENCH_DW_MEO(A,in,out) \
Dw.CayleyZeroCounters(); \
Dw. A (in,out,0); \
@ -148,9 +178,15 @@ int main (int argc, char ** argv)
LatticeFermion sref(sFGrid);
LatticeFermion result(sFGrid);
std::cout<<GridLogMessage << "Constructing Vec5D Dw "<<std::endl;
DomainWallFermionVec5dR Dw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5);
RealD b=1.5;// Scale factor b+c=2, b-c=1
RealD c=0.5;
std::vector<ComplexD> gamma(Ls,std::complex<double>(1.0,0.0));
ZMobiusFermionVec5dR zDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5,gamma,b,c);
std::cout<<GridLogMessage << "Calling Dhop "<<std::endl;
FGrid->Barrier();
@ -173,10 +209,13 @@ int main (int argc, char ** argv)
BENCH_DW_MEO(Dhop ,src,result);
BENCH_DW_MEO(DhopEO ,src_o,r_e);
BENCH_DW(Meooe ,src_o,r_e);
BENCH_DW_SSC(Meooe ,src_o,r_e);
BENCH_DW(Mooee ,src_o,r_o);
BENCH_DW(MooeeInv,src_o,r_o);
BENCH_ZDW(Mooee ,src_o,r_o);
BENCH_ZDW(MooeeInv,src_o,r_o);
}
Grid_finalize();

View File

@ -0,0 +1,134 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_staggered.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl;
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid);
pRNG.SeedFixedIntegers(seeds);
// pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
typename ImprovedStaggeredFermionR::ImplParams params;
FermionField src (&Grid); random(pRNG,src);
FermionField result(&Grid); result=zero;
FermionField ref(&Grid); ref=zero;
FermionField tmp(&Grid); tmp=zero;
FermionField err(&Grid); tmp=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);
double volume=1;
for(int mu=0;mu<Nd;mu++){
volume=volume*latt_size[mu];
}
// Only one non-zero (y)
#if 0
Umu=zero;
Complex cone(1.0,0.0);
for(int nn=0;nn<Nd;nn++){
random(pRNG,U[nn]);
if(1) {
if (nn!=2) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
// else { U[nn]= cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
else { std::cout<<GridLogMessage << "random gauge field in dir "<<nn<<std::endl; }
}
PokeIndex<LorentzIndex>(Umu,U[nn],nn);
}
#endif
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
}
ref = zero;
/*
{ // Naive wilson implementation
ref = zero;
for(int mu=0;mu<Nd;mu++){
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
tmp = U[mu]*Cshift(src,mu,1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
}
tmp =adj(U[mu])*src;
tmp =Cshift(tmp,mu,-1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
}
}
}
ref = -0.5*ref;
*/
RealD mass=0.1;
RealD c1=9.0/8.0;
RealD c2=-1.0/24.0;
RealD u0=1.0;
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params);
std::cout<<GridLogMessage << "Calling Ds"<<std::endl;
int ncall=1000;
double t0=usecond();
for(int i=0;i<ncall;i++){
Ds.Dhop(src,result,0);
}
double t1=usecond();
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
Grid_finalize();
}

View File

@ -55,7 +55,7 @@ int main (int argc, char ** argv)
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
LatticeColourMatrix z(&Grid);// random(pRNG,z);
LatticeColourMatrix x(&Grid);// random(pRNG,x);
@ -88,7 +88,7 @@ int main (int argc, char ** argv)
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
LatticeColourMatrix z(&Grid); //random(pRNG,z);
LatticeColourMatrix x(&Grid); //random(pRNG,x);
@ -119,7 +119,7 @@ int main (int argc, char ** argv)
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
LatticeColourMatrix z(&Grid); //random(pRNG,z);
LatticeColourMatrix x(&Grid); //random(pRNG,x);
@ -150,7 +150,7 @@ int main (int argc, char ** argv)
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
LatticeColourMatrix z(&Grid); //random(pRNG,z);
LatticeColourMatrix x(&Grid); //random(pRNG,x);

View File

@ -37,11 +37,11 @@ struct scal {
d internal;
};
Gamma::GammaMatrix Gmu [] = {
Gamma::GammaX,
Gamma::GammaY,
Gamma::GammaZ,
Gamma::GammaT
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT
};
bool overlapComms = false;
@ -69,7 +69,7 @@ int main (int argc, char ** argv)
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid);
pRNG.SeedFixedIntegers(seeds);
// pRNG.SeedRandomDevice();
// pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
LatticeFermion src (&Grid); random(pRNG,src);
LatticeFermion result(&Grid); result=zero;
@ -106,7 +106,7 @@ int main (int argc, char ** argv)
{ // Naive wilson implementation
ref = zero;
for(int mu=0;mu<Nd;mu++){
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
// ref = src + Gamma(Gamma::Algebra::GammaX)* src ; // 1-gamma_x
tmp = U[mu]*Cshift(src,mu,1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
@ -159,7 +159,7 @@ int main (int argc, char ** argv)
ref = zero;
for(int mu=0;mu<Nd;mu++){
// ref = src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
// ref = src - Gamma(Gamma::Algebra::GammaX)* src ; // 1+gamma_x
tmp = U[mu]*Cshift(src,mu,1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;

View File

@ -30,11 +30,11 @@ struct scal {
d internal;
};
Gamma::GammaMatrix Gmu [] = {
Gamma::GammaX,
Gamma::GammaY,
Gamma::GammaZ,
Gamma::GammaT
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT
};
bool overlapComms = false;

View File

@ -6,7 +6,7 @@ AC_CANONICAL_TARGET
AM_INIT_AUTOMAKE(subdir-objects)
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_SRCDIR([lib/Grid.h])
AC_CONFIG_HEADERS([lib/Config.h])
AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
############### Checks for programs
@ -99,6 +99,13 @@ case ${ac_MKL} in
AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);;
esac
############### HDF5
AC_ARG_WITH([hdf5],
[AS_HELP_STRING([--with-hdf5=prefix],
[try this for a non-standard install prefix of the HDF5 library])],
[AM_CXXFLAGS="-I$with_hdf5/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_hdf5/lib $AM_LDFLAGS"])
############### first-touch
AC_ARG_ENABLE([numa],
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
@ -145,6 +152,12 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3],
[AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])]
[have_fftw=true])
AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp],
[AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])]
[have_hdf5=true]
[LIBS="${LIBS} -lhdf5"], [], [-lhdf5])
AM_CONDITIONAL(BUILD_HDF5, [ test "${have_hdf5}X" == "trueX" ])
CXXFLAGS=$CXXFLAGS_CPY
LDFLAGS=$LDFLAGS_CPY
@ -306,9 +319,9 @@ AM_CONDITIONAL(BUILD_COMMS_MPI3L, [ test "${comms_type}X" == "mpi3lX" ] )
AM_CONDITIONAL(BUILD_COMMS_NONE, [ test "${comms_type}X" == "noneX" ])
############### RNG selection
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937|sitmo],\
[Select Random Number Generator to be used])],\
[ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
[ac_RNG=${enable_rng}],[ac_RNG=sitmo])
case ${ac_RNG} in
ranlux48)
@ -317,6 +330,9 @@ case ${ac_RNG} in
mt19937)
AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] )
;;
sitmo)
AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] )
;;
*)
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
;;
@ -381,10 +397,14 @@ AC_CONFIG_FILES(tests/IO/Makefile)
AC_CONFIG_FILES(tests/core/Makefile)
AC_CONFIG_FILES(tests/debug/Makefile)
AC_CONFIG_FILES(tests/forces/Makefile)
AC_CONFIG_FILES(tests/hadrons/Makefile)
AC_CONFIG_FILES(tests/hmc/Makefile)
AC_CONFIG_FILES(tests/solver/Makefile)
AC_CONFIG_FILES(tests/qdpxx/Makefile)
AC_CONFIG_FILES(tests/testu01/Makefile)
AC_CONFIG_FILES(benchmarks/Makefile)
AC_CONFIG_FILES(extras/Makefile)
AC_CONFIG_FILES(extras/Hadrons/Makefile)
AC_OUTPUT
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -407,6 +427,7 @@ RNG choice : ${ac_RNG}
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
LAPACK : ${ac_LAPACK}
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
----- BUILD FLAGS -------------------------------------
CXXFLAGS:

View File

@ -0,0 +1,317 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Application.cc
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Application.hpp>
#include <Grid/Hadrons/GeneticScheduler.hpp>
using namespace Grid;
using namespace QCD;
using namespace Hadrons;
#define BIG_SEP "==============="
#define SEP "---------------"
/******************************************************************************
* Application implementation *
******************************************************************************/
// constructors ////////////////////////////////////////////////////////////////
Application::Application(void)
{
LOG(Message) << "Modules available:" << std::endl;
auto list = ModuleFactory::getInstance().getBuilderList();
for (auto &m: list)
{
LOG(Message) << " " << m << std::endl;
}
auto dim = GridDefaultLatt(), mpi = GridDefaultMpi(), loc(dim);
locVol_ = 1;
for (unsigned int d = 0; d < dim.size(); ++d)
{
loc[d] /= mpi[d];
locVol_ *= loc[d];
}
LOG(Message) << "Global lattice: " << dim << std::endl;
LOG(Message) << "MPI partition : " << mpi << std::endl;
LOG(Message) << "Local lattice : " << loc << std::endl;
}
Application::Application(const Application::GlobalPar &par)
: Application()
{
setPar(par);
}
Application::Application(const std::string parameterFileName)
: Application()
{
parameterFileName_ = parameterFileName;
}
// environment shortcut ////////////////////////////////////////////////////////
Environment & Application::env(void) const
{
return Environment::getInstance();
}
// access //////////////////////////////////////////////////////////////////////
void Application::setPar(const Application::GlobalPar &par)
{
par_ = par;
env().setSeed(strToVec<int>(par_.seed));
}
const Application::GlobalPar & Application::getPar(void)
{
return par_;
}
// execute /////////////////////////////////////////////////////////////////////
void Application::run(void)
{
if (!parameterFileName_.empty() and (env().getNModule() == 0))
{
parseParameterFile(parameterFileName_);
}
if (!scheduled_)
{
schedule();
}
printSchedule();
configLoop();
}
// parse parameter file ////////////////////////////////////////////////////////
class ObjectId: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(ObjectId,
std::string, name,
std::string, type);
};
void Application::parseParameterFile(const std::string parameterFileName)
{
XmlReader reader(parameterFileName);
GlobalPar par;
ObjectId id;
LOG(Message) << "Building application from '" << parameterFileName << "'..." << std::endl;
read(reader, "parameters", par);
setPar(par);
push(reader, "modules");
push(reader, "module");
do
{
read(reader, "id", id);
env().createModule(id.name, id.type, reader);
} while (reader.nextElement("module"));
pop(reader);
pop(reader);
}
void Application::saveParameterFile(const std::string parameterFileName)
{
XmlWriter writer(parameterFileName);
ObjectId id;
const unsigned int nMod = env().getNModule();
LOG(Message) << "Saving application to '" << parameterFileName << "'..." << std::endl;
write(writer, "parameters", getPar());
push(writer, "modules");
for (unsigned int i = 0; i < nMod; ++i)
{
push(writer, "module");
id.name = env().getModuleName(i);
id.type = env().getModule(i)->getRegisteredName();
write(writer, "id", id);
env().getModule(i)->saveParameters(writer, "options");
pop(writer);
}
pop(writer);
pop(writer);
}
// schedule computation ////////////////////////////////////////////////////////
#define MEM_MSG(size)\
sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)"
#define DEFINE_MEMPEAK \
auto memPeak = [this](const std::vector<unsigned int> &program)\
{\
unsigned int memPeak;\
bool msg;\
\
msg = HadronsLogMessage.isActive();\
HadronsLogMessage.Active(false);\
env().dryRun(true);\
memPeak = env().executeProgram(program);\
env().dryRun(false);\
env().freeAll();\
HadronsLogMessage.Active(true);\
\
return memPeak;\
}
void Application::schedule(void)
{
DEFINE_MEMPEAK;
// build module dependency graph
LOG(Message) << "Building module graph..." << std::endl;
auto graph = env().makeModuleGraph();
auto con = graph.getConnectedComponents();
// constrained topological sort using a genetic algorithm
LOG(Message) << "Scheduling computation..." << std::endl;
LOG(Message) << " #module= " << graph.size() << std::endl;
LOG(Message) << " population size= " << par_.genetic.popSize << std::endl;
LOG(Message) << " max. generation= " << par_.genetic.maxGen << std::endl;
LOG(Message) << " max. cst. generation= " << par_.genetic.maxCstGen << std::endl;
LOG(Message) << " mutation rate= " << par_.genetic.mutationRate << std::endl;
unsigned int k = 0, gen, prevPeak, nCstPeak = 0;
std::random_device rd;
GeneticScheduler<unsigned int>::Parameters par;
par.popSize = par_.genetic.popSize;
par.mutationRate = par_.genetic.mutationRate;
par.seed = rd();
memPeak_ = 0;
CartesianCommunicator::BroadcastWorld(0, &(par.seed), sizeof(par.seed));
for (unsigned int i = 0; i < con.size(); ++i)
{
GeneticScheduler<unsigned int> scheduler(con[i], memPeak, par);
gen = 0;
do
{
LOG(Debug) << "Generation " << gen << ":" << std::endl;
scheduler.nextGeneration();
if (gen != 0)
{
if (prevPeak == scheduler.getMinValue())
{
nCstPeak++;
}
else
{
nCstPeak = 0;
}
}
prevPeak = scheduler.getMinValue();
if (gen % 10 == 0)
{
LOG(Iterative) << "Generation " << gen << ": "
<< MEM_MSG(scheduler.getMinValue()) << std::endl;
}
gen++;
} while ((gen < par_.genetic.maxGen)
and (nCstPeak < par_.genetic.maxCstGen));
auto &t = scheduler.getMinSchedule();
if (scheduler.getMinValue() > memPeak_)
{
memPeak_ = scheduler.getMinValue();
}
for (unsigned int j = 0; j < t.size(); ++j)
{
program_.push_back(t[j]);
}
}
scheduled_ = true;
}
void Application::saveSchedule(const std::string filename)
{
TextWriter writer(filename);
std::vector<std::string> program;
if (!scheduled_)
{
HADRON_ERROR("Computation not scheduled");
}
LOG(Message) << "Saving current schedule to '" << filename << "'..."
<< std::endl;
for (auto address: program_)
{
program.push_back(env().getModuleName(address));
}
write(writer, "schedule", program);
}
void Application::loadSchedule(const std::string filename)
{
DEFINE_MEMPEAK;
TextReader reader(filename);
std::vector<std::string> program;
LOG(Message) << "Loading schedule from '" << filename << "'..."
<< std::endl;
read(reader, "schedule", program);
program_.clear();
for (auto &name: program)
{
program_.push_back(env().getModuleAddress(name));
}
scheduled_ = true;
memPeak_ = memPeak(program_);
}
void Application::printSchedule(void)
{
if (!scheduled_)
{
HADRON_ERROR("Computation not scheduled");
}
LOG(Message) << "Schedule (memory peak: " << MEM_MSG(memPeak_) << "):"
<< std::endl;
for (unsigned int i = 0; i < program_.size(); ++i)
{
LOG(Message) << std::setw(4) << i + 1 << ": "
<< env().getModuleName(program_[i]) << std::endl;
}
}
// loop on configurations //////////////////////////////////////////////////////
void Application::configLoop(void)
{
auto range = par_.trajCounter;
for (unsigned int t = range.start; t < range.end; t += range.step)
{
LOG(Message) << BIG_SEP << " Starting measurement for trajectory " << t
<< " " << BIG_SEP << std::endl;
env().setTrajectory(t);
env().executeProgram(program_);
}
LOG(Message) << BIG_SEP << " End of measurement " << BIG_SEP << std::endl;
env().freeAll();
}

View File

@ -0,0 +1,132 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Application.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Application_hpp_
#define Hadrons_Application_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Environment.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
#include <Grid/Hadrons/Modules.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Main program manager *
******************************************************************************/
class Application
{
public:
class TrajRange: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange,
unsigned int, start,
unsigned int, end,
unsigned int, step);
};
class GeneticPar: Serializable
{
public:
GeneticPar(void):
popSize{20}, maxGen{1000}, maxCstGen{100}, mutationRate{.1} {};
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(GeneticPar,
unsigned int, popSize,
unsigned int, maxGen,
unsigned int, maxCstGen,
double , mutationRate);
};
class GlobalPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar,
TrajRange, trajCounter,
GeneticPar, genetic,
std::string, seed);
};
public:
// constructors
Application(void);
Application(const GlobalPar &par);
Application(const std::string parameterFileName);
// destructor
virtual ~Application(void) = default;
// access
void setPar(const GlobalPar &par);
const GlobalPar & getPar(void);
// module creation
template <typename M>
void createModule(const std::string name);
template <typename M>
void createModule(const std::string name, const typename M::Par &par);
// execute
void run(void);
// XML parameter file I/O
void parseParameterFile(const std::string parameterFileName);
void saveParameterFile(const std::string parameterFileName);
// schedule computation
void schedule(void);
void saveSchedule(const std::string filename);
void loadSchedule(const std::string filename);
void printSchedule(void);
// loop on configurations
void configLoop(void);
private:
// environment shortcut
Environment & env(void) const;
private:
long unsigned int locVol_;
std::string parameterFileName_{""};
GlobalPar par_;
std::vector<unsigned int> program_;
Environment::Size memPeak_;
bool scheduled_{false};
};
/******************************************************************************
* Application template implementation *
******************************************************************************/
// module creation /////////////////////////////////////////////////////////////
template <typename M>
void Application::createModule(const std::string name)
{
env().createModule<M>(name);
}
template <typename M>
void Application::createModule(const std::string name,
const typename M::Par &par)
{
env().createModule<M>(name, par);
}
END_HADRONS_NAMESPACE
#endif // Hadrons_Application_hpp_

View File

@ -0,0 +1,743 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Environment.cc
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Environment.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
using namespace Grid;
using namespace QCD;
using namespace Hadrons;
/******************************************************************************
* Environment implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
Environment::Environment(void)
{
nd_ = GridDefaultLatt().size();
grid4d_.reset(SpaceTimeGrid::makeFourDimGrid(
GridDefaultLatt(), GridDefaultSimd(nd_, vComplex::Nsimd()),
GridDefaultMpi()));
gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get()));
auto loc = getGrid()->LocalDimensions();
locVol_ = 1;
for (unsigned int d = 0; d < loc.size(); ++d)
{
locVol_ *= loc[d];
}
rng4d_.reset(new GridParallelRNG(grid4d_.get()));
}
// dry run /////////////////////////////////////////////////////////////////////
void Environment::dryRun(const bool isDry)
{
dryRun_ = isDry;
}
bool Environment::isDryRun(void) const
{
return dryRun_;
}
// trajectory number ///////////////////////////////////////////////////////////
void Environment::setTrajectory(const unsigned int traj)
{
traj_ = traj;
}
unsigned int Environment::getTrajectory(void) const
{
return traj_;
}
// grids ///////////////////////////////////////////////////////////////////////
void Environment::createGrid(const unsigned int Ls)
{
if (grid5d_.find(Ls) == grid5d_.end())
{
auto g = getGrid();
grid5d_[Ls].reset(SpaceTimeGrid::makeFiveDimGrid(Ls, g));
gridRb5d_[Ls].reset(SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, g));
}
}
GridCartesian * Environment::getGrid(const unsigned int Ls) const
{
try
{
if (Ls == 1)
{
return grid4d_.get();
}
else
{
return grid5d_.at(Ls).get();
}
}
catch(std::out_of_range &)
{
HADRON_ERROR("no grid with Ls= " << Ls);
}
}
GridRedBlackCartesian * Environment::getRbGrid(const unsigned int Ls) const
{
try
{
if (Ls == 1)
{
return gridRb4d_.get();
}
else
{
return gridRb5d_.at(Ls).get();
}
}
catch(std::out_of_range &)
{
HADRON_ERROR("no red-black 5D grid with Ls= " << Ls);
}
}
unsigned int Environment::getNd(void) const
{
return nd_;
}
// random number generator /////////////////////////////////////////////////////
void Environment::setSeed(const std::vector<int> &seed)
{
rng4d_->SeedFixedIntegers(seed);
}
GridParallelRNG * Environment::get4dRng(void) const
{
return rng4d_.get();
}
// module management ///////////////////////////////////////////////////////////
void Environment::pushModule(Environment::ModPt &pt)
{
std::string name = pt->getName();
if (!hasModule(name))
{
std::vector<unsigned int> inputAddress;
unsigned int address;
ModuleInfo m;
m.data = std::move(pt);
m.type = typeIdPt(*m.data.get());
m.name = name;
auto input = m.data->getInput();
for (auto &in: input)
{
if (!hasObject(in))
{
addObject(in , -1);
}
m.input.push_back(objectAddress_[in]);
}
auto output = m.data->getOutput();
module_.push_back(std::move(m));
address = static_cast<unsigned int>(module_.size() - 1);
moduleAddress_[name] = address;
for (auto &out: output)
{
if (!hasObject(out))
{
addObject(out, address);
}
else
{
if (object_[objectAddress_[out]].module < 0)
{
object_[objectAddress_[out]].module = address;
}
else
{
HADRON_ERROR("object '" + out
+ "' is already produced by module '"
+ module_[object_[getObjectAddress(out)].module].name
+ "' (while pushing module '" + name + "')");
}
}
}
}
else
{
HADRON_ERROR("module '" + name + "' already exists");
}
}
unsigned int Environment::getNModule(void) const
{
return module_.size();
}
void Environment::createModule(const std::string name, const std::string type,
XmlReader &reader)
{
auto &factory = ModuleFactory::getInstance();
auto pt = factory.create(type, name);
pt->parseParameters(reader, "options");
pushModule(pt);
}
ModuleBase * Environment::getModule(const unsigned int address) const
{
if (hasModule(address))
{
return module_[address].data.get();
}
else
{
HADRON_ERROR("no module with address " + std::to_string(address));
}
}
ModuleBase * Environment::getModule(const std::string name) const
{
return getModule(getModuleAddress(name));
}
unsigned int Environment::getModuleAddress(const std::string name) const
{
if (hasModule(name))
{
return moduleAddress_.at(name);
}
else
{
HADRON_ERROR("no module with name '" + name + "'");
}
}
std::string Environment::getModuleName(const unsigned int address) const
{
if (hasModule(address))
{
return module_[address].name;
}
else
{
HADRON_ERROR("no module with address " + std::to_string(address));
}
}
std::string Environment::getModuleType(const unsigned int address) const
{
if (hasModule(address))
{
return typeName(module_[address].type);
}
else
{
HADRON_ERROR("no module with address " + std::to_string(address));
}
}
std::string Environment::getModuleType(const std::string name) const
{
return getModuleType(getModuleAddress(name));
}
bool Environment::hasModule(const unsigned int address) const
{
return (address < module_.size());
}
bool Environment::hasModule(const std::string name) const
{
return (moduleAddress_.find(name) != moduleAddress_.end());
}
Graph<unsigned int> Environment::makeModuleGraph(void) const
{
Graph<unsigned int> moduleGraph;
for (unsigned int i = 0; i < module_.size(); ++i)
{
moduleGraph.addVertex(i);
for (auto &j: module_[i].input)
{
moduleGraph.addEdge(object_[j].module, i);
}
}
return moduleGraph;
}
#define BIG_SEP "==============="
#define SEP "---------------"
#define MEM_MSG(size)\
sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)"
Environment::Size
Environment::executeProgram(const std::vector<unsigned int> &p)
{
Size memPeak = 0, sizeBefore, sizeAfter;
std::vector<std::set<unsigned int>> freeProg;
bool continueCollect, nothingFreed;
// build garbage collection schedule
freeProg.resize(p.size());
for (unsigned int i = 0; i < object_.size(); ++i)
{
auto pred = [i, this](const unsigned int j)
{
auto &in = module_[j].input;
auto it = std::find(in.begin(), in.end(), i);
return (it != in.end()) or (j == object_[i].module);
};
auto it = std::find_if(p.rbegin(), p.rend(), pred);
if (it != p.rend())
{
freeProg[p.rend() - it - 1].insert(i);
}
}
// program execution
for (unsigned int i = 0; i < p.size(); ++i)
{
// execute module
if (!isDryRun())
{
LOG(Message) << SEP << " Measurement step " << i+1 << "/"
<< p.size() << " (module '" << module_[p[i]].name
<< "') " << SEP << std::endl;
}
(*module_[p[i]].data)();
sizeBefore = getTotalSize();
// print used memory after execution
if (!isDryRun())
{
LOG(Message) << "Allocated objects: " << MEM_MSG(sizeBefore)
<< std::endl;
}
if (sizeBefore > memPeak)
{
memPeak = sizeBefore;
}
// garbage collection for step i
if (!isDryRun())
{
LOG(Message) << "Garbage collection..." << std::endl;
}
nothingFreed = true;
do
{
continueCollect = false;
auto toFree = freeProg[i];
for (auto &j: toFree)
{
// continue garbage collection while there are still
// objects without owners
continueCollect = continueCollect or !hasOwners(j);
if(freeObject(j))
{
// if an object has been freed, remove it from
// the garbage collection schedule
freeProg[i].erase(j);
nothingFreed = false;
}
}
} while (continueCollect);
// any remaining objects in step i garbage collection schedule
// is scheduled for step i + 1
if (i + 1 < p.size())
{
for (auto &j: freeProg[i])
{
freeProg[i + 1].insert(j);
}
}
// print used memory after garbage collection if necessary
if (!isDryRun())
{
sizeAfter = getTotalSize();
if (sizeBefore != sizeAfter)
{
LOG(Message) << "Allocated objects: " << MEM_MSG(sizeAfter)
<< std::endl;
}
else
{
LOG(Message) << "Nothing to free" << std::endl;
}
}
}
return memPeak;
}
Environment::Size Environment::executeProgram(const std::vector<std::string> &p)
{
std::vector<unsigned int> pAddress;
for (auto &n: p)
{
pAddress.push_back(getModuleAddress(n));
}
return executeProgram(pAddress);
}
// general memory management ///////////////////////////////////////////////////
void Environment::addObject(const std::string name, const int moduleAddress)
{
if (!hasObject(name))
{
ObjInfo info;
info.name = name;
info.module = moduleAddress;
object_.push_back(std::move(info));
objectAddress_[name] = static_cast<unsigned int>(object_.size() - 1);
}
else
{
HADRON_ERROR("object '" + name + "' already exists");
}
}
void Environment::registerObject(const unsigned int address,
const unsigned int size, const unsigned int Ls)
{
if (!hasRegisteredObject(address))
{
if (hasObject(address))
{
object_[address].size = size;
object_[address].Ls = Ls;
object_[address].isRegistered = true;
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
}
}
else
{
HADRON_ERROR("object with address " + std::to_string(address)
+ " already registered");
}
}
void Environment::registerObject(const std::string name,
const unsigned int size, const unsigned int Ls)
{
if (!hasObject(name))
{
addObject(name);
}
registerObject(getObjectAddress(name), size, Ls);
}
unsigned int Environment::getObjectAddress(const std::string name) const
{
if (hasObject(name))
{
return objectAddress_.at(name);
}
else
{
HADRON_ERROR("no object with name '" + name + "'");
}
}
std::string Environment::getObjectName(const unsigned int address) const
{
if (hasObject(address))
{
return object_[address].name;
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
}
}
std::string Environment::getObjectType(const unsigned int address) const
{
if (hasRegisteredObject(address))
{
return typeName(object_[address].type);
}
else if (hasObject(address))
{
HADRON_ERROR("object with address " + std::to_string(address)
+ " exists but is not registered");
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
}
}
std::string Environment::getObjectType(const std::string name) const
{
return getObjectType(getObjectAddress(name));
}
Environment::Size Environment::getObjectSize(const unsigned int address) const
{
if (hasRegisteredObject(address))
{
return object_[address].size;
}
else if (hasObject(address))
{
HADRON_ERROR("object with address " + std::to_string(address)
+ " exists but is not registered");
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
}
}
Environment::Size Environment::getObjectSize(const std::string name) const
{
return getObjectSize(getObjectAddress(name));
}
unsigned int Environment::getObjectLs(const unsigned int address) const
{
if (hasRegisteredObject(address))
{
return object_[address].Ls;
}
else if (hasObject(address))
{
HADRON_ERROR("object with address " + std::to_string(address)
+ " exists but is not registered");
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
}
}
unsigned int Environment::getObjectLs(const std::string name) const
{
return getObjectLs(getObjectAddress(name));
}
bool Environment::hasObject(const unsigned int address) const
{
return (address < object_.size());
}
bool Environment::hasObject(const std::string name) const
{
auto it = objectAddress_.find(name);
return ((it != objectAddress_.end()) and hasObject(it->second));
}
bool Environment::hasRegisteredObject(const unsigned int address) const
{
if (hasObject(address))
{
return object_[address].isRegistered;
}
else
{
return false;
}
}
bool Environment::hasRegisteredObject(const std::string name) const
{
if (hasObject(name))
{
return hasRegisteredObject(getObjectAddress(name));
}
else
{
return false;
}
}
bool Environment::hasCreatedObject(const unsigned int address) const
{
if (hasObject(address))
{
return (object_[address].data != nullptr);
}
else
{
return false;
}
}
bool Environment::hasCreatedObject(const std::string name) const
{
if (hasObject(name))
{
return hasCreatedObject(getObjectAddress(name));
}
else
{
return false;
}
}
bool Environment::isObject5d(const unsigned int address) const
{
return (getObjectLs(address) > 1);
}
bool Environment::isObject5d(const std::string name) const
{
return (getObjectLs(name) > 1);
}
Environment::Size Environment::getTotalSize(void) const
{
Environment::Size size = 0;
for (auto &o: object_)
{
if (o.isRegistered)
{
size += o.size;
}
}
return size;
}
void Environment::addOwnership(const unsigned int owner,
const unsigned int property)
{
if (hasObject(property))
{
object_[property].owners.insert(owner);
}
else
{
HADRON_ERROR("no object with address " + std::to_string(property));
}
if (hasObject(owner))
{
object_[owner].properties.insert(property);
}
else
{
HADRON_ERROR("no object with address " + std::to_string(owner));
}
}
void Environment::addOwnership(const std::string owner,
const std::string property)
{
addOwnership(getObjectAddress(owner), getObjectAddress(property));
}
bool Environment::hasOwners(const unsigned int address) const
{
if (hasObject(address))
{
return (!object_[address].owners.empty());
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
}
}
bool Environment::hasOwners(const std::string name) const
{
return hasOwners(getObjectAddress(name));
}
bool Environment::freeObject(const unsigned int address)
{
if (!hasOwners(address))
{
if (!isDryRun() and object_[address].isRegistered)
{
LOG(Message) << "Destroying object '" << object_[address].name
<< "'" << std::endl;
}
for (auto &p: object_[address].properties)
{
object_[p].owners.erase(address);
}
object_[address].size = 0;
object_[address].Ls = 0;
object_[address].isRegistered = false;
object_[address].type = nullptr;
object_[address].owners.clear();
object_[address].properties.clear();
object_[address].data.reset(nullptr);
return true;
}
else
{
return false;
}
}
bool Environment::freeObject(const std::string name)
{
return freeObject(getObjectAddress(name));
}
void Environment::freeAll(void)
{
for (unsigned int i = 0; i < object_.size(); ++i)
{
freeObject(i);
}
}
void Environment::printContent(void)
{
LOG(Message) << "Modules: " << std::endl;
for (unsigned int i = 0; i < module_.size(); ++i)
{
LOG(Message) << std::setw(4) << i << ": "
<< getModuleName(i) << std::endl;
}
LOG(Message) << "Objects: " << std::endl;
for (unsigned int i = 0; i < object_.size(); ++i)
{
LOG(Message) << std::setw(4) << i << ": "
<< getObjectName(i) << std::endl;
}
}

View File

@ -0,0 +1,385 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Environment.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Environment_hpp_
#define Hadrons_Environment_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Graph.hpp>
#ifndef SITE_SIZE_TYPE
#define SITE_SIZE_TYPE unsigned int
#endif
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Global environment *
******************************************************************************/
// forward declaration of Module
class ModuleBase;
class Object
{
public:
Object(void) = default;
virtual ~Object(void) = default;
};
template <typename T>
class Holder: public Object
{
public:
Holder(void) = default;
Holder(T *pt);
virtual ~Holder(void) = default;
T & get(void) const;
T * getPt(void) const;
void reset(T *pt);
private:
std::unique_ptr<T> objPt_{nullptr};
};
class Environment
{
SINGLETON(Environment);
public:
typedef SITE_SIZE_TYPE Size;
typedef std::unique_ptr<ModuleBase> ModPt;
typedef std::unique_ptr<GridCartesian> GridPt;
typedef std::unique_ptr<GridRedBlackCartesian> GridRbPt;
typedef std::unique_ptr<GridParallelRNG> RngPt;
typedef std::unique_ptr<LatticeBase> LatticePt;
private:
struct ModuleInfo
{
const std::type_info *type{nullptr};
std::string name;
ModPt data{nullptr};
std::vector<unsigned int> input;
};
struct ObjInfo
{
Size size{0};
unsigned int Ls{0};
bool isRegistered{false};
const std::type_info *type{nullptr};
std::string name;
int module{-1};
std::set<unsigned int> owners, properties;
std::unique_ptr<Object> data{nullptr};
};
public:
// dry run
void dryRun(const bool isDry);
bool isDryRun(void) const;
// trajectory number
void setTrajectory(const unsigned int traj);
unsigned int getTrajectory(void) const;
// grids
void createGrid(const unsigned int Ls);
GridCartesian * getGrid(const unsigned int Ls = 1) const;
GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const;
unsigned int getNd(void) const;
// random number generator
void setSeed(const std::vector<int> &seed);
GridParallelRNG * get4dRng(void) const;
// module management
void pushModule(ModPt &pt);
template <typename M>
void createModule(const std::string name);
template <typename M>
void createModule(const std::string name,
const typename M::Par &par);
void createModule(const std::string name,
const std::string type,
XmlReader &reader);
unsigned int getNModule(void) const;
ModuleBase * getModule(const unsigned int address) const;
ModuleBase * getModule(const std::string name) const;
template <typename M>
M * getModule(const unsigned int address) const;
template <typename M>
M * getModule(const std::string name) const;
unsigned int getModuleAddress(const std::string name) const;
std::string getModuleName(const unsigned int address) const;
std::string getModuleType(const unsigned int address) const;
std::string getModuleType(const std::string name) const;
bool hasModule(const unsigned int address) const;
bool hasModule(const std::string name) const;
Graph<unsigned int> makeModuleGraph(void) const;
Size executeProgram(const std::vector<unsigned int> &p);
Size executeProgram(const std::vector<std::string> &p);
// general memory management
void addObject(const std::string name,
const int moduleAddress = -1);
void registerObject(const unsigned int address,
const unsigned int size,
const unsigned int Ls = 1);
void registerObject(const std::string name,
const unsigned int size,
const unsigned int Ls = 1);
template <typename T>
unsigned int lattice4dSize(void) const;
template <typename T>
void registerLattice(const unsigned int address,
const unsigned int Ls = 1);
template <typename T>
void registerLattice(const std::string name,
const unsigned int Ls = 1);
template <typename T>
void setObject(const unsigned int address, T *object);
template <typename T>
void setObject(const std::string name, T *object);
template <typename T>
T * getObject(const unsigned int address) const;
template <typename T>
T * getObject(const std::string name) const;
template <typename T>
T * createLattice(const unsigned int address);
template <typename T>
T * createLattice(const std::string name);
unsigned int getObjectAddress(const std::string name) const;
std::string getObjectName(const unsigned int address) const;
std::string getObjectType(const unsigned int address) const;
std::string getObjectType(const std::string name) const;
Size getObjectSize(const unsigned int address) const;
Size getObjectSize(const std::string name) const;
unsigned int getObjectLs(const unsigned int address) const;
unsigned int getObjectLs(const std::string name) const;
bool hasObject(const unsigned int address) const;
bool hasObject(const std::string name) const;
bool hasRegisteredObject(const unsigned int address) const;
bool hasRegisteredObject(const std::string name) const;
bool hasCreatedObject(const unsigned int address) const;
bool hasCreatedObject(const std::string name) const;
bool isObject5d(const unsigned int address) const;
bool isObject5d(const std::string name) const;
Environment::Size getTotalSize(void) const;
void addOwnership(const unsigned int owner,
const unsigned int property);
void addOwnership(const std::string owner,
const std::string property);
bool hasOwners(const unsigned int address) const;
bool hasOwners(const std::string name) const;
bool freeObject(const unsigned int address);
bool freeObject(const std::string name);
void freeAll(void);
void printContent(void);
private:
// general
bool dryRun_{false};
unsigned int traj_, locVol_;
// grids
GridPt grid4d_;
std::map<unsigned int, GridPt> grid5d_;
GridRbPt gridRb4d_;
std::map<unsigned int, GridRbPt> gridRb5d_;
unsigned int nd_;
// random number generator
RngPt rng4d_;
// module and related maps
std::vector<ModuleInfo> module_;
std::map<std::string, unsigned int> moduleAddress_;
// lattice store
std::map<unsigned int, LatticePt> lattice_;
// object store
std::vector<ObjInfo> object_;
std::map<std::string, unsigned int> objectAddress_;
};
/******************************************************************************
* Holder template implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename T>
Holder<T>::Holder(T *pt)
: objPt_(pt)
{}
// access //////////////////////////////////////////////////////////////////////
template <typename T>
T & Holder<T>::get(void) const
{
return &objPt_.get();
}
template <typename T>
T * Holder<T>::getPt(void) const
{
return objPt_.get();
}
template <typename T>
void Holder<T>::reset(T *pt)
{
objPt_.reset(pt);
}
/******************************************************************************
* Environment template implementation *
******************************************************************************/
// module management ///////////////////////////////////////////////////////////
template <typename M>
void Environment::createModule(const std::string name)
{
ModPt pt(new M(name));
pushModule(pt);
}
template <typename M>
void Environment::createModule(const std::string name,
const typename M::Par &par)
{
ModPt pt(new M(name));
static_cast<M *>(pt.get())->setPar(par);
pushModule(pt);
}
template <typename M>
M * Environment::getModule(const unsigned int address) const
{
if (auto *pt = dynamic_cast<M *>(getModule(address)))
{
return pt;
}
else
{
HADRON_ERROR("module '" + module_[address].name
+ "' does not have type " + typeid(M).name()
+ "(object type: " + getModuleType(address) + ")");
}
}
template <typename M>
M * Environment::getModule(const std::string name) const
{
return getModule<M>(getModuleAddress(name));
}
template <typename T>
unsigned int Environment::lattice4dSize(void) const
{
return sizeof(typename T::vector_object)/getGrid()->Nsimd();
}
template <typename T>
void Environment::registerLattice(const unsigned int address,
const unsigned int Ls)
{
createGrid(Ls);
registerObject(address, Ls*lattice4dSize<T>(), Ls);
}
template <typename T>
void Environment::registerLattice(const std::string name, const unsigned int Ls)
{
createGrid(Ls);
registerObject(name, Ls*lattice4dSize<T>(), Ls);
}
template <typename T>
void Environment::setObject(const unsigned int address, T *object)
{
if (hasRegisteredObject(address))
{
object_[address].data.reset(new Holder<T>(object));
object_[address].type = &typeid(T);
}
else if (hasObject(address))
{
HADRON_ERROR("object with address " + std::to_string(address) +
" exists but is not registered");
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
}
}
template <typename T>
void Environment::setObject(const std::string name, T *object)
{
setObject(getObjectAddress(name), object);
}
template <typename T>
T * Environment::getObject(const unsigned int address) const
{
if (hasRegisteredObject(address))
{
if (auto h = dynamic_cast<Holder<T> *>(object_[address].data.get()))
{
return h->getPt();
}
else
{
HADRON_ERROR("object with address " + std::to_string(address) +
" does not have type '" + typeid(T).name() +
"' (has type '" + getObjectType(address) + "')");
}
}
else if (hasObject(address))
{
HADRON_ERROR("object with address " + std::to_string(address) +
" exists but is not registered");
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
}
}
template <typename T>
T * Environment::getObject(const std::string name) const
{
return getObject<T>(getObjectAddress(name));
}
template <typename T>
T * Environment::createLattice(const unsigned int address)
{
GridCartesian *g = getGrid(getObjectLs(address));
setObject(address, new T(g));
return getObject<T>(address);
}
template <typename T>
T * Environment::createLattice(const std::string name)
{
return createLattice<T>(getObjectAddress(name));
}
END_HADRONS_NAMESPACE
#endif // Hadrons_Environment_hpp_

106
extras/Hadrons/Factory.hpp Normal file
View File

@ -0,0 +1,106 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Factory.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Factory_hpp_
#define Hadrons_Factory_hpp_
#include <Grid/Hadrons/Global.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* abstract factory class *
******************************************************************************/
template <typename T>
class Factory
{
public:
typedef std::function<std::unique_ptr<T>(const std::string)> Func;
public:
// constructor
Factory(void) = default;
// destructor
virtual ~Factory(void) = default;
// registration
void registerBuilder(const std::string type, const Func &f);
// get builder list
std::vector<std::string> getBuilderList(void) const;
// factory
std::unique_ptr<T> create(const std::string type,
const std::string name) const;
private:
std::map<std::string, Func> builder_;
};
/******************************************************************************
* template implementation *
******************************************************************************/
// registration ////////////////////////////////////////////////////////////////
template <typename T>
void Factory<T>::registerBuilder(const std::string type, const Func &f)
{
builder_[type] = f;
}
// get module list /////////////////////////////////////////////////////////////
template <typename T>
std::vector<std::string> Factory<T>::getBuilderList(void) const
{
std::vector<std::string> list;
for (auto &b: builder_)
{
list.push_back(b.first);
}
return list;
}
// factory /////////////////////////////////////////////////////////////////////
template <typename T>
std::unique_ptr<T> Factory<T>::create(const std::string type,
const std::string name) const
{
Func func;
try
{
func = builder_.at(type);
}
catch (std::out_of_range &)
{
HADRON_ERROR("object of type '" + type + "' unknown");
}
return func(name);
}
END_HADRONS_NAMESPACE
#endif // Hadrons_Factory_hpp_

View File

@ -0,0 +1,329 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/GeneticScheduler.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_GeneticScheduler_hpp_
#define Hadrons_GeneticScheduler_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Graph.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Scheduler based on a genetic algorithm *
******************************************************************************/
template <typename T>
class GeneticScheduler
{
public:
typedef std::vector<T> Gene;
typedef std::pair<Gene *, Gene *> GenePair;
typedef std::function<int(const Gene &)> ObjFunc;
struct Parameters
{
double mutationRate;
unsigned int popSize, seed;
};
public:
// constructor
GeneticScheduler(Graph<T> &graph, const ObjFunc &func,
const Parameters &par);
// destructor
virtual ~GeneticScheduler(void) = default;
// access
const Gene & getMinSchedule(void);
int getMinValue(void);
// breed a new generation
void nextGeneration(void);
// heuristic benchmarks
void benchmarkCrossover(const unsigned int nIt);
// print population
friend std::ostream & operator<<(std::ostream &out,
const GeneticScheduler<T> &s)
{
out << "[";
for (auto &p: s.population_)
{
out << p.first << ", ";
}
out << "\b\b]";
return out;
}
private:
// evolution steps
void initPopulation(void);
void doCrossover(void);
void doMutation(void);
// genetic operators
GenePair selectPair(void);
void crossover(Gene &c1, Gene &c2, const Gene &p1, const Gene &p2);
void mutation(Gene &m, const Gene &c);
private:
Graph<T> &graph_;
const ObjFunc &func_;
const Parameters par_;
std::multimap<int, Gene> population_;
std::mt19937 gen_;
};
/******************************************************************************
* template implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename T>
GeneticScheduler<T>::GeneticScheduler(Graph<T> &graph, const ObjFunc &func,
const Parameters &par)
: graph_(graph)
, func_(func)
, par_(par)
{
gen_.seed(par_.seed);
}
// access //////////////////////////////////////////////////////////////////////
template <typename T>
const typename GeneticScheduler<T>::Gene &
GeneticScheduler<T>::getMinSchedule(void)
{
return population_.begin()->second;
}
template <typename T>
int GeneticScheduler<T>::getMinValue(void)
{
return population_.begin()->first;
}
// breed a new generation //////////////////////////////////////////////////////
template <typename T>
void GeneticScheduler<T>::nextGeneration(void)
{
// random initialization of the population if necessary
if (population_.size() != par_.popSize)
{
initPopulation();
}
LOG(Debug) << "Starting population:\n" << *this << std::endl;
// random mutations
//PARALLEL_FOR_LOOP
for (unsigned int i = 0; i < par_.popSize; ++i)
{
doMutation();
}
LOG(Debug) << "After mutations:\n" << *this << std::endl;
// mating
//PARALLEL_FOR_LOOP
for (unsigned int i = 0; i < par_.popSize/2; ++i)
{
doCrossover();
}
LOG(Debug) << "After mating:\n" << *this << std::endl;
// grim reaper
auto it = population_.begin();
std::advance(it, par_.popSize);
population_.erase(it, population_.end());
LOG(Debug) << "After grim reaper:\n" << *this << std::endl;
}
// evolution steps /////////////////////////////////////////////////////////////
template <typename T>
void GeneticScheduler<T>::initPopulation(void)
{
population_.clear();
for (unsigned int i = 0; i < par_.popSize; ++i)
{
auto p = graph_.topoSort(gen_);
population_.insert(std::make_pair(func_(p), p));
}
}
template <typename T>
void GeneticScheduler<T>::doCrossover(void)
{
auto p = selectPair();
Gene &p1 = *(p.first), &p2 = *(p.second);
Gene c1, c2;
crossover(c1, c2, p1, p2);
PARALLEL_CRITICAL
{
population_.insert(std::make_pair(func_(c1), c1));
population_.insert(std::make_pair(func_(c2), c2));
}
}
template <typename T>
void GeneticScheduler<T>::doMutation(void)
{
std::uniform_real_distribution<double> mdis(0., 1.);
std::uniform_int_distribution<unsigned int> pdis(0, population_.size() - 1);
if (mdis(gen_) < par_.mutationRate)
{
Gene m;
auto it = population_.begin();
std::advance(it, pdis(gen_));
mutation(m, it->second);
PARALLEL_CRITICAL
{
population_.insert(std::make_pair(func_(m), m));
}
}
}
// genetic operators ///////////////////////////////////////////////////////////
template <typename T>
typename GeneticScheduler<T>::GenePair GeneticScheduler<T>::selectPair(void)
{
std::vector<double> prob;
unsigned int ind;
Gene *p1, *p2;
for (auto &c: population_)
{
prob.push_back(1./c.first);
}
do
{
double probCpy;
std::discrete_distribution<unsigned int> dis1(prob.begin(), prob.end());
auto rIt = population_.begin();
ind = dis1(gen_);
std::advance(rIt, ind);
p1 = &(rIt->second);
probCpy = prob[ind];
prob[ind] = 0.;
std::discrete_distribution<unsigned int> dis2(prob.begin(), prob.end());
rIt = population_.begin();
std::advance(rIt, dis2(gen_));
p2 = &(rIt->second);
prob[ind] = probCpy;
} while (p1 == p2);
return std::make_pair(p1, p2);
}
template <typename T>
void GeneticScheduler<T>::crossover(Gene &c1, Gene &c2, const Gene &p1,
const Gene &p2)
{
Gene buf;
std::uniform_int_distribution<unsigned int> dis(0, p1.size() - 1);
unsigned int cut = dis(gen_);
c1.clear();
buf = p2;
for (unsigned int i = 0; i < cut; ++i)
{
c1.push_back(p1[i]);
buf.erase(std::find(buf.begin(), buf.end(), p1[i]));
}
for (unsigned int i = 0; i < buf.size(); ++i)
{
c1.push_back(buf[i]);
}
c2.clear();
buf = p2;
for (unsigned int i = cut; i < p1.size(); ++i)
{
buf.erase(std::find(buf.begin(), buf.end(), p1[i]));
}
for (unsigned int i = 0; i < buf.size(); ++i)
{
c2.push_back(buf[i]);
}
for (unsigned int i = cut; i < p1.size(); ++i)
{
c2.push_back(p1[i]);
}
}
template <typename T>
void GeneticScheduler<T>::mutation(Gene &m, const Gene &c)
{
Gene buf;
std::uniform_int_distribution<unsigned int> dis(0, c.size() - 1);
unsigned int cut = dis(gen_);
Graph<T> g1 = graph_, g2 = graph_;
for (unsigned int i = 0; i < cut; ++i)
{
g1.removeVertex(c[i]);
}
for (unsigned int i = cut; i < c.size(); ++i)
{
g2.removeVertex(c[i]);
}
if (g1.size() > 0)
{
buf = g1.topoSort(gen_);
}
if (g2.size() > 0)
{
m = g2.topoSort(gen_);
}
for (unsigned int i = cut; i < c.size(); ++i)
{
m.push_back(buf[i - cut]);
}
}
template <typename T>
void GeneticScheduler<T>::benchmarkCrossover(const unsigned int nIt)
{
Gene p1, p2, c1, c2;
double neg = 0., eq = 0., pos = 0., total;
int improvement;
LOG(Message) << "Benchmarking crossover..." << std::endl;
for (unsigned int i = 0; i < nIt; ++i)
{
p1 = graph_.topoSort(gen_);
p2 = graph_.topoSort(gen_);
crossover(c1, c2, p1, p2);
improvement = (func_(c1) + func_(c2) - func_(p1) - func_(p2))/2;
if (improvement < 0) neg++; else if (improvement == 0) eq++; else pos++;
}
total = neg + eq + pos;
LOG(Message) << " -: " << neg/total << " =: " << eq/total
<< " +: " << pos/total << std::endl;
}
END_HADRONS_NAMESPACE
#endif // Hadrons_GeneticScheduler_hpp_

82
extras/Hadrons/Global.cc Normal file
View File

@ -0,0 +1,82 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Global.cc
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Global.hpp>
using namespace Grid;
using namespace QCD;
using namespace Hadrons;
HadronsLogger Hadrons::HadronsLogError(1,"Error");
HadronsLogger Hadrons::HadronsLogWarning(1,"Warning");
HadronsLogger Hadrons::HadronsLogMessage(1,"Message");
HadronsLogger Hadrons::HadronsLogIterative(1,"Iterative");
HadronsLogger Hadrons::HadronsLogDebug(1,"Debug");
// pretty size formatting //////////////////////////////////////////////////////
std::string Hadrons::sizeString(long unsigned int bytes)
{
constexpr unsigned int bufSize = 256;
const char *suffixes[7] = {"", "K", "M", "G", "T", "P", "E"};
char buf[256];
long unsigned int s = 0;
double count = bytes;
while (count >= 1024 && s < 7)
{
s++;
count /= 1024;
}
if (count - floor(count) == 0.0)
{
snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]);
}
else
{
snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]);
}
return std::string(buf);
}
// type utilities //////////////////////////////////////////////////////////////
constexpr unsigned int maxNameSize = 1024u;
std::string Hadrons::typeName(const std::type_info *info)
{
char *buf;
std::string name;
buf = abi::__cxa_demangle(info->name(), nullptr, nullptr, nullptr);
name = buf;
free(buf);
return name;
}

150
extras/Hadrons/Global.hpp Normal file
View File

@ -0,0 +1,150 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Global.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Global_hpp_
#define Hadrons_Global_hpp_
#include <set>
#include <stack>
#include <Grid/Grid.h>
#include <cxxabi.h>
#define BEGIN_HADRONS_NAMESPACE \
namespace Grid {\
using namespace QCD;\
namespace Hadrons {\
using Grid::operator<<;
#define END_HADRONS_NAMESPACE }}
#define BEGIN_MODULE_NAMESPACE(name)\
namespace name {\
using Grid::operator<<;
#define END_MODULE_NAMESPACE }
/* the 'using Grid::operator<<;' statement prevents a very nasty compilation
* error with GCC 5 (clang & GCC 6 compile fine without it).
*/
// FIXME: find a way to do that in a more general fashion
#ifndef FIMPL
#define FIMPL WilsonImplR
#endif
BEGIN_HADRONS_NAMESPACE
// type aliases
#define TYPE_ALIASES(FImpl, suffix)\
typedef FermionOperator<FImpl> FMat##suffix; \
typedef typename FImpl::FermionField FermionField##suffix; \
typedef typename FImpl::PropagatorField PropagatorField##suffix; \
typedef typename FImpl::SitePropagator SitePropagator##suffix; \
typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix;\
typedef std::function<void(FermionField##suffix &, \
const FermionField##suffix &)> SolverFn##suffix;
// logger
class HadronsLogger: public Logger
{
public:
HadronsLogger(int on, std::string nm): Logger("Hadrons", on, nm,
GridLogColours, "BLACK"){};
};
#define LOG(channel) std::cout << HadronsLog##channel
#define HADRON_ERROR(msg)\
LOG(Error) << msg << " (" << __FUNCTION__ << " at " << __FILE__ << ":"\
<< __LINE__ << ")" << std::endl;\
abort();
#define DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl;
extern HadronsLogger HadronsLogError;
extern HadronsLogger HadronsLogWarning;
extern HadronsLogger HadronsLogMessage;
extern HadronsLogger HadronsLogIterative;
extern HadronsLogger HadronsLogDebug;
// singleton pattern
#define SINGLETON(name)\
public:\
name(const name &e) = delete;\
void operator=(const name &e) = delete;\
static name & getInstance(void)\
{\
static name e;\
return e;\
}\
private:\
name(void);
#define SINGLETON_DEFCTOR(name)\
public:\
name(const name &e) = delete;\
void operator=(const name &e) = delete;\
static name & getInstance(void)\
{\
static name e;\
return e;\
}\
private:\
name(void) = default;
// pretty size formating
std::string sizeString(long unsigned int bytes);
// type utilities
template <typename T>
const std::type_info * typeIdPt(const T &x)
{
return &typeid(x);
}
std::string typeName(const std::type_info *info);
template <typename T>
const std::type_info * typeIdPt(void)
{
return &typeid(T);
}
template <typename T>
std::string typeName(const T &x)
{
return typeName(typeIdPt(x));
}
template <typename T>
std::string typeName(void)
{
return typeName(typeIdPt<T>());
}
END_HADRONS_NAMESPACE
#endif // Hadrons_Global_hpp_

760
extras/Hadrons/Graph.hpp Normal file
View File

@ -0,0 +1,760 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Graph.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Graph_hpp_
#define Hadrons_Graph_hpp_
#include <Grid/Hadrons/Global.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Oriented graph class *
******************************************************************************/
// I/O for edges
template <typename T>
std::ostream & operator<<(std::ostream &out, const std::pair<T, T> &e)
{
out << "\"" << e.first << "\" -> \"" << e.second << "\"";
return out;
}
// main class
template <typename T>
class Graph
{
public:
typedef std::pair<T, T> Edge;
public:
// constructor
Graph(void);
// destructor
virtual ~Graph(void) = default;
// access
void addVertex(const T &value);
void addEdge(const Edge &e);
void addEdge(const T &start, const T &end);
std::vector<T> getVertices(void) const;
void removeVertex(const T &value);
void removeEdge(const Edge &e);
void removeEdge(const T &start, const T &end);
unsigned int size(void) const;
// tests
bool gotValue(const T &value) const;
// graph topological manipulations
std::vector<T> getAdjacentVertices(const T &value) const;
std::vector<T> getChildren(const T &value) const;
std::vector<T> getParents(const T &value) const;
std::vector<T> getRoots(void) const;
std::vector<Graph<T>> getConnectedComponents(void) const;
std::vector<T> topoSort(void);
template <typename Gen>
std::vector<T> topoSort(Gen &gen);
std::vector<std::vector<T>> allTopoSort(void);
// I/O
friend std::ostream & operator<<(std::ostream &out, const Graph<T> &g)
{
out << "{";
for (auto &e: g.edgeSet_)
{
out << e << ", ";
}
if (g.edgeSet_.size() != 0)
{
out << "\b\b";
}
out << "}";
return out;
}
private:
// vertex marking
void mark(const T &value, const bool doMark = true);
void markAll(const bool doMark = true);
void unmark(const T &value);
void unmarkAll(void);
bool isMarked(const T &value) const;
const T * getFirstMarked(const bool isMarked = true) const;
template <typename Gen>
const T * getRandomMarked(const bool isMarked, Gen &gen);
const T * getFirstUnmarked(void) const;
template <typename Gen>
const T * getRandomUnmarked(Gen &gen);
// prune marked/unmarked vertices
void removeMarked(const bool isMarked = true);
void removeUnmarked(void);
// depth-first search marking
void depthFirstSearch(void);
void depthFirstSearch(const T &root);
private:
std::map<T, bool> isMarked_;
std::set<Edge> edgeSet_;
};
// build depedency matrix from topological sorts
template <typename T>
std::map<T, std::map<T, bool>>
makeDependencyMatrix(const std::vector<std::vector<T>> &topSort);
/******************************************************************************
* template implementation *
******************************************************************************
* in all the following V is the number of vertex and E is the number of edge
* in the worst case E = V^2
*/
// constructor /////////////////////////////////////////////////////////////////
template <typename T>
Graph<T>::Graph(void)
{}
// access //////////////////////////////////////////////////////////////////////
// complexity: log(V)
template <typename T>
void Graph<T>::addVertex(const T &value)
{
isMarked_[value] = false;
}
// complexity: O(log(V))
template <typename T>
void Graph<T>::addEdge(const Edge &e)
{
addVertex(e.first);
addVertex(e.second);
edgeSet_.insert(e);
}
// complexity: O(log(V))
template <typename T>
void Graph<T>::addEdge(const T &start, const T &end)
{
addEdge(Edge(start, end));
}
template <typename T>
std::vector<T> Graph<T>::getVertices(void) const
{
std::vector<T> vertex;
for (auto &v: isMarked_)
{
vertex.push_back(v.first);
}
return vertex;
}
// complexity: O(V*log(V))
template <typename T>
void Graph<T>::removeVertex(const T &value)
{
// remove vertex from the mark table
auto vIt = isMarked_.find(value);
if (vIt != isMarked_.end())
{
isMarked_.erase(vIt);
}
else
{
HADRON_ERROR("vertex " << value << " does not exists");
}
// remove all edges containing the vertex
auto pred = [&value](const Edge &e)
{
return ((e.first == value) or (e.second == value));
};
auto eIt = find_if(edgeSet_.begin(), edgeSet_.end(), pred);
while (eIt != edgeSet_.end())
{
edgeSet_.erase(eIt);
eIt = find_if(edgeSet_.begin(), edgeSet_.end(), pred);
}
}
// complexity: O(log(V))
template <typename T>
void Graph<T>::removeEdge(const Edge &e)
{
auto eIt = edgeSet_.find(e);
if (eIt != edgeSet_.end())
{
edgeSet_.erase(eIt);
}
else
{
HADRON_ERROR("edge " << e << " does not exists");
}
}
// complexity: O(log(V))
template <typename T>
void Graph<T>::removeEdge(const T &start, const T &end)
{
removeEdge(Edge(start, end));
}
// complexity: O(1)
template <typename T>
unsigned int Graph<T>::size(void) const
{
return isMarked_.size();
}
// tests ///////////////////////////////////////////////////////////////////////
// complexity: O(log(V))
template <typename T>
bool Graph<T>::gotValue(const T &value) const
{
auto it = isMarked_.find(value);
if (it == isMarked_.end())
{
return false;
}
else
{
return true;
}
}
// vertex marking //////////////////////////////////////////////////////////////
// complexity: O(log(V))
template <typename T>
void Graph<T>::mark(const T &value, const bool doMark)
{
if (gotValue(value))
{
isMarked_[value] = doMark;
}
else
{
HADRON_ERROR("vertex " << value << " does not exists");
}
}
// complexity: O(V*log(V))
template <typename T>
void Graph<T>::markAll(const bool doMark)
{
for (auto &v: isMarked_)
{
mark(v.first, doMark);
}
}
// complexity: O(log(V))
template <typename T>
void Graph<T>::unmark(const T &value)
{
mark(value, false);
}
// complexity: O(V*log(V))
template <typename T>
void Graph<T>::unmarkAll(void)
{
markAll(false);
}
// complexity: O(log(V))
template <typename T>
bool Graph<T>::isMarked(const T &value) const
{
if (gotValue(value))
{
return isMarked_.at(value);
}
else
{
HADRON_ERROR("vertex " << value << " does not exists");
return false;
}
}
// complexity: O(log(V))
template <typename T>
const T * Graph<T>::getFirstMarked(const bool isMarked) const
{
auto pred = [&isMarked](const std::pair<T, bool> &v)
{
return (v.second == isMarked);
};
auto vIt = std::find_if(isMarked_.begin(), isMarked_.end(), pred);
if (vIt != isMarked_.end())
{
return &(vIt->first);
}
else
{
return nullptr;
}
}
// complexity: O(log(V))
template <typename T>
template <typename Gen>
const T * Graph<T>::getRandomMarked(const bool isMarked, Gen &gen)
{
auto pred = [&isMarked](const std::pair<T, bool> &v)
{
return (v.second == isMarked);
};
std::uniform_int_distribution<unsigned int> dis(0, size() - 1);
auto rIt = isMarked_.begin();
std::advance(rIt, dis(gen));
auto vIt = std::find_if(rIt, isMarked_.end(), pred);
if (vIt != isMarked_.end())
{
return &(vIt->first);
}
else
{
vIt = std::find_if(isMarked_.begin(), rIt, pred);
if (vIt != rIt)
{
return &(vIt->first);
}
else
{
return nullptr;
}
}
}
// complexity: O(log(V))
template <typename T>
const T * Graph<T>::getFirstUnmarked(void) const
{
return getFirstMarked(false);
}
// complexity: O(log(V))
template <typename T>
template <typename Gen>
const T * Graph<T>::getRandomUnmarked(Gen &gen)
{
return getRandomMarked(false, gen);
}
// prune marked/unmarked vertices //////////////////////////////////////////////
// complexity: O(V^2*log(V))
template <typename T>
void Graph<T>::removeMarked(const bool isMarked)
{
auto isMarkedCopy = isMarked_;
for (auto &v: isMarkedCopy)
{
if (v.second == isMarked)
{
removeVertex(v.first);
}
}
}
// complexity: O(V^2*log(V))
template <typename T>
void Graph<T>::removeUnmarked(void)
{
removeMarked(false);
}
// depth-first search marking //////////////////////////////////////////////////
// complexity: O(V*log(V))
template <typename T>
void Graph<T>::depthFirstSearch(void)
{
depthFirstSearch(isMarked_.begin()->first);
}
// complexity: O(V*log(V))
template <typename T>
void Graph<T>::depthFirstSearch(const T &root)
{
std::vector<T> adjacentVertex;
mark(root);
adjacentVertex = getAdjacentVertices(root);
for (auto &v: adjacentVertex)
{
if (!isMarked(v))
{
depthFirstSearch(v);
}
}
}
// graph topological manipulations /////////////////////////////////////////////
// complexity: O(V*log(V))
template <typename T>
std::vector<T> Graph<T>::getAdjacentVertices(const T &value) const
{
std::vector<T> adjacentVertex;
auto pred = [&value](const Edge &e)
{
return ((e.first == value) or (e.second == value));
};
auto eIt = find_if(edgeSet_.begin(), edgeSet_.end(), pred);
while (eIt != edgeSet_.end())
{
if (eIt->first == value)
{
adjacentVertex.push_back((*eIt).second);
}
else if (eIt->second == value)
{
adjacentVertex.push_back((*eIt).first);
}
eIt = find_if(++eIt, edgeSet_.end(), pred);
}
return adjacentVertex;
}
// complexity: O(V*log(V))
template <typename T>
std::vector<T> Graph<T>::getChildren(const T &value) const
{
std::vector<T> child;
auto pred = [&value](const Edge &e)
{
return (e.first == value);
};
auto eIt = find_if(edgeSet_.begin(), edgeSet_.end(), pred);
while (eIt != edgeSet_.end())
{
child.push_back((*eIt).second);
eIt = find_if(++eIt, edgeSet_.end(), pred);
}
return child;
}
// complexity: O(V*log(V))
template <typename T>
std::vector<T> Graph<T>::getParents(const T &value) const
{
std::vector<T> parent;
auto pred = [&value](const Edge &e)
{
return (e.second == value);
};
auto eIt = find_if(edgeSet_.begin(), edgeSet_.end(), pred);
while (eIt != edgeSet_.end())
{
parent.push_back((*eIt).first);
eIt = find_if(++eIt, edgeSet_.end(), pred);
}
return parent;
}
// complexity: O(V^2*log(V))
template <typename T>
std::vector<T> Graph<T>::getRoots(void) const
{
std::vector<T> root;
for (auto &v: isMarked_)
{
auto parent = getParents(v.first);
if (parent.size() == 0)
{
root.push_back(v.first);
}
}
return root;
}
// complexity: O(V^2*log(V))
template <typename T>
std::vector<Graph<T>> Graph<T>::getConnectedComponents(void) const
{
std::vector<Graph<T>> res;
Graph<T> copy(*this);
while (copy.size() > 0)
{
copy.depthFirstSearch();
res.push_back(copy);
res.back().removeUnmarked();
res.back().unmarkAll();
copy.removeMarked();
copy.unmarkAll();
}
return res;
}
// topological sort using a directed DFS algorithm
// complexity: O(V*log(V))
template <typename T>
std::vector<T> Graph<T>::topoSort(void)
{
std::stack<T> buf;
std::vector<T> res;
const T *vPt;
std::map<T, bool> tmpMarked(isMarked_);
// visit function
std::function<void(const T &)> visit = [&](const T &v)
{
if (tmpMarked.at(v))
{
HADRON_ERROR("cannot topologically sort a cyclic graph");
}
if (!isMarked(v))
{
std::vector<T> child = getChildren(v);
tmpMarked[v] = true;
for (auto &c: child)
{
visit(c);
}
mark(v);
tmpMarked[v] = false;
buf.push(v);
}
};
// reset temporary marks
for (auto &v: tmpMarked)
{
tmpMarked.at(v.first) = false;
}
// loop on unmarked vertices
unmarkAll();
vPt = getFirstUnmarked();
while (vPt)
{
visit(*vPt);
vPt = getFirstUnmarked();
}
unmarkAll();
// create result vector
while (!buf.empty())
{
res.push_back(buf.top());
buf.pop();
}
return res;
}
// random version of the topological sort
// complexity: O(V*log(V))
template <typename T>
template <typename Gen>
std::vector<T> Graph<T>::topoSort(Gen &gen)
{
std::stack<T> buf;
std::vector<T> res;
const T *vPt;
std::map<T, bool> tmpMarked(isMarked_);
// visit function
std::function<void(const T &)> visit = [&](const T &v)
{
if (tmpMarked.at(v))
{
HADRON_ERROR("cannot topologically sort a cyclic graph");
}
if (!isMarked(v))
{
std::vector<T> child = getChildren(v);
tmpMarked[v] = true;
std::shuffle(child.begin(), child.end(), gen);
for (auto &c: child)
{
visit(c);
}
mark(v);
tmpMarked[v] = false;
buf.push(v);
}
};
// reset temporary marks
for (auto &v: tmpMarked)
{
tmpMarked.at(v.first) = false;
}
// loop on unmarked vertices
unmarkAll();
vPt = getRandomUnmarked(gen);
while (vPt)
{
visit(*vPt);
vPt = getRandomUnmarked(gen);
}
unmarkAll();
// create result vector
while (!buf.empty())
{
res.push_back(buf.top());
buf.pop();
}
return res;
}
// generate all possible topological sorts
// Y. L. Varol & D. Rotem, Comput. J. 24(1), pp. 8384, 1981
// http://comjnl.oupjournals.org/cgi/doi/10.1093/comjnl/24.1.83
// complexity: O(V*log(V)) (from the paper, but really ?)
template <typename T>
std::vector<std::vector<T>> Graph<T>::allTopoSort(void)
{
std::vector<std::vector<T>> res;
std::map<T, std::map<T, bool>> iMat;
// create incidence matrix
for (auto &v1: isMarked_)
for (auto &v2: isMarked_)
{
iMat[v1.first][v2.first] = false;
}
for (auto &v: isMarked_)
{
auto cVec = getChildren(v.first);
for (auto &c: cVec)
{
iMat[v.first][c] = true;
}
}
// generate initial topological sort
res.push_back(topoSort());
// generate all other topological sorts by permutation
std::vector<T> p = res[0];
const unsigned int n = size();
std::vector<unsigned int> loc(n);
unsigned int i, k, k1;
T obj_k, obj_k1;
bool isFinal;
for (unsigned int j = 0; j < n; ++j)
{
loc[j] = j;
}
i = 0;
while (i < n-1)
{
k = loc[i];
k1 = k + 1;
obj_k = p[k];
if (k1 >= n)
{
isFinal = true;
obj_k1 = obj_k;
}
else
{
isFinal = false;
obj_k1 = p[k1];
}
if (iMat[res[0][i]][obj_k1] or isFinal)
{
for (unsigned int l = k; l >= i + 1; --l)
{
p[l] = p[l-1];
}
p[i] = obj_k;
loc[i] = i;
i++;
}
else
{
p[k] = obj_k1;
p[k1] = obj_k;
loc[i] = k1;
i = 0;
res.push_back(p);
}
}
return res;
}
// build depedency matrix from topological sorts ///////////////////////////////
// complexity: something like O(V^2*log(V!))
template <typename T>
std::map<T, std::map<T, bool>>
makeDependencyMatrix(const std::vector<std::vector<T>> &topSort)
{
std::map<T, std::map<T, bool>> m;
const std::vector<T> &vList = topSort[0];
for (auto &v1: vList)
for (auto &v2: vList)
{
bool dep = true;
for (auto &t: topSort)
{
auto i1 = std::find(t.begin(), t.end(), v1);
auto i2 = std::find(t.begin(), t.end(), v2);
dep = dep and (i1 - i2 > 0);
if (!dep) break;
}
m[v1][v2] = dep;
}
return m;
}
END_HADRONS_NAMESPACE
#endif // Hadrons_Graph_hpp_

View File

@ -0,0 +1,80 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/HadronsXmlRun.cc
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Application.hpp>
using namespace Grid;
using namespace QCD;
using namespace Hadrons;
int main(int argc, char *argv[])
{
// parse command line
std::string parameterFileName, scheduleFileName = "";
if (argc < 2)
{
std::cerr << "usage: " << argv[0] << " <parameter file> [<precomputed schedule>] [Grid options]";
std::cerr << std::endl;
std::exit(EXIT_FAILURE);
}
parameterFileName = argv[1];
if (argc > 2)
{
if (argv[2][0] != '-')
{
scheduleFileName = argv[2];
}
}
// initialization
Grid_init(&argc, &argv);
HadronsLogError.Active(GridLogError.isActive());
HadronsLogWarning.Active(GridLogWarning.isActive());
HadronsLogMessage.Active(GridLogMessage.isActive());
HadronsLogIterative.Active(GridLogIterative.isActive());
HadronsLogDebug.Active(GridLogDebug.isActive());
LOG(Message) << "Grid initialized" << std::endl;
// execution
Application application(parameterFileName);
application.parseParameterFile(parameterFileName);
if (!scheduleFileName.empty())
{
application.loadSchedule(scheduleFileName);
}
application.run();
// epilogue
LOG(Message) << "Grid is finalizing now" << std::endl;
Grid_finalize();
return EXIT_SUCCESS;
}

View File

@ -0,0 +1,72 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/HadronsXmlSchedule.cc
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Application.hpp>
using namespace Grid;
using namespace QCD;
using namespace Hadrons;
int main(int argc, char *argv[])
{
// parse command line
std::string parameterFileName, scheduleFileName;
if (argc < 3)
{
std::cerr << "usage: " << argv[0] << " <parameter file> <schedule output> [Grid options]";
std::cerr << std::endl;
std::exit(EXIT_FAILURE);
}
parameterFileName = argv[1];
scheduleFileName = argv[2];
// initialization
Grid_init(&argc, &argv);
HadronsLogError.Active(GridLogError.isActive());
HadronsLogWarning.Active(GridLogWarning.isActive());
HadronsLogMessage.Active(GridLogMessage.isActive());
HadronsLogIterative.Active(GridLogIterative.isActive());
HadronsLogDebug.Active(GridLogDebug.isActive());
LOG(Message) << "Grid initialized" << std::endl;
// execution
Application application;
application.parseParameterFile(parameterFileName);
application.schedule();
application.printSchedule();
application.saveSchedule(scheduleFileName);
// epilogue
LOG(Message) << "Grid is finalizing now" << std::endl;
Grid_finalize();
return EXIT_SUCCESS;
}

View File

@ -0,0 +1,29 @@
lib_LIBRARIES = libHadrons.a
bin_PROGRAMS = HadronsXmlRun HadronsXmlSchedule
include modules.inc
libHadrons_a_SOURCES = \
$(modules_cc) \
Application.cc \
Environment.cc \
Global.cc \
Module.cc
libHadrons_adir = $(pkgincludedir)/Hadrons
nobase_libHadrons_a_HEADERS = \
$(modules_hpp) \
Application.hpp \
Environment.hpp \
Factory.hpp \
GeneticScheduler.hpp \
Global.hpp \
Graph.hpp \
Module.hpp \
Modules.hpp \
ModuleFactory.hpp
HadronsXmlRun_SOURCES = HadronsXmlRun.cc
HadronsXmlRun_LDADD = libHadrons.a -lGrid
HadronsXmlSchedule_SOURCES = HadronsXmlSchedule.cc
HadronsXmlSchedule_LDADD = libHadrons.a -lGrid

71
extras/Hadrons/Module.cc Normal file
View File

@ -0,0 +1,71 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Module.cc
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Module.hpp>
using namespace Grid;
using namespace QCD;
using namespace Hadrons;
/******************************************************************************
* ModuleBase implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
ModuleBase::ModuleBase(const std::string name)
: name_(name)
, env_(Environment::getInstance())
{}
// access //////////////////////////////////////////////////////////////////////
std::string ModuleBase::getName(void) const
{
return name_;
}
Environment & ModuleBase::env(void) const
{
return env_;
}
// get factory registration name if available
std::string ModuleBase::getRegisteredName(void)
{
HADRON_ERROR("module '" + getName() + "' has a type not registered"
+ " in the factory");
}
// execution ///////////////////////////////////////////////////////////////////
void ModuleBase::operator()(void)
{
setup();
if (!env().isDryRun())
{
execute();
}
}

198
extras/Hadrons/Module.hpp Normal file
View File

@ -0,0 +1,198 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Module.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Module_hpp_
#define Hadrons_Module_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Environment.hpp>
BEGIN_HADRONS_NAMESPACE
// module registration macros
#define MODULE_REGISTER(mod, base)\
class mod: public base\
{\
public:\
typedef base Base;\
using Base::Base;\
virtual std::string getRegisteredName(void)\
{\
return std::string(#mod);\
}\
};\
class mod##ModuleRegistrar\
{\
public:\
mod##ModuleRegistrar(void)\
{\
ModuleFactory &modFac = ModuleFactory::getInstance();\
modFac.registerBuilder(#mod, [&](const std::string name)\
{\
return std::unique_ptr<mod>(new mod(name));\
});\
}\
};\
static mod##ModuleRegistrar mod##ModuleRegistrarInstance;
#define MODULE_REGISTER_NS(mod, base, ns)\
class mod: public base\
{\
public:\
typedef base Base;\
using Base::Base;\
virtual std::string getRegisteredName(void)\
{\
return std::string(#ns "::" #mod);\
}\
};\
class ns##mod##ModuleRegistrar\
{\
public:\
ns##mod##ModuleRegistrar(void)\
{\
ModuleFactory &modFac = ModuleFactory::getInstance();\
modFac.registerBuilder(#ns "::" #mod, [&](const std::string name)\
{\
return std::unique_ptr<ns::mod>(new ns::mod(name));\
});\
}\
};\
static ns##mod##ModuleRegistrar ns##mod##ModuleRegistrarInstance;
#define ARG(...) __VA_ARGS__
/******************************************************************************
* Module class *
******************************************************************************/
// base class
class ModuleBase
{
public:
// constructor
ModuleBase(const std::string name);
// destructor
virtual ~ModuleBase(void) = default;
// access
std::string getName(void) const;
Environment &env(void) const;
// get factory registration name if available
virtual std::string getRegisteredName(void);
// dependencies/products
virtual std::vector<std::string> getInput(void) = 0;
virtual std::vector<std::string> getOutput(void) = 0;
// parse parameters
virtual void parseParameters(XmlReader &reader, const std::string name) = 0;
virtual void saveParameters(XmlWriter &writer, const std::string name) = 0;
// setup
virtual void setup(void) {};
// execution
void operator()(void);
virtual void execute(void) = 0;
private:
std::string name_;
Environment &env_;
};
// derived class, templating the parameter class
template <typename P>
class Module: public ModuleBase
{
public:
typedef P Par;
public:
// constructor
Module(const std::string name);
// destructor
virtual ~Module(void) = default;
// parse parameters
virtual void parseParameters(XmlReader &reader, const std::string name);
virtual void saveParameters(XmlWriter &writer, const std::string name);
// parameter access
const P & par(void) const;
void setPar(const P &par);
private:
P par_;
};
// no parameter type
class NoPar {};
template <>
class Module<NoPar>: public ModuleBase
{
public:
// constructor
Module(const std::string name): ModuleBase(name) {};
// destructor
virtual ~Module(void) = default;
// parse parameters (do nothing)
virtual void parseParameters(XmlReader &reader, const std::string name) {};
virtual void saveParameters(XmlWriter &writer, const std::string name)
{
push(writer, "options");
pop(writer);
};
};
/******************************************************************************
* Template implementation *
******************************************************************************/
template <typename P>
Module<P>::Module(const std::string name)
: ModuleBase(name)
{}
template <typename P>
void Module<P>::parseParameters(XmlReader &reader, const std::string name)
{
read(reader, name, par_);
}
template <typename P>
void Module<P>::saveParameters(XmlWriter &writer, const std::string name)
{
write(writer, name, par_);
}
template <typename P>
const P & Module<P>::par(void) const
{
return par_;
}
template <typename P>
void Module<P>::setPar(const P &par)
{
par_ = par;
}
END_HADRONS_NAMESPACE
#endif // Hadrons_Module_hpp_

View File

@ -0,0 +1,49 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/ModuleFactory.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_ModuleFactory_hpp_
#define Hadrons_ModuleFactory_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Factory.hpp>
#include <Grid/Hadrons/Module.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* ModuleFactory *
******************************************************************************/
class ModuleFactory: public Factory<ModuleBase>
{
SINGLETON_DEFCTOR(ModuleFactory)
};
END_HADRONS_NAMESPACE
#endif // Hadrons_ModuleFactory_hpp_

View File

@ -0,0 +1,40 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MAction/DWF.hpp>
#include <Grid/Hadrons/Modules/MAction/Wilson.hpp>
#include <Grid/Hadrons/Modules/MContraction/Baryon.hpp>
#include <Grid/Hadrons/Modules/MContraction/Meson.hpp>
#include <Grid/Hadrons/Modules/MGauge/Load.hpp>
#include <Grid/Hadrons/Modules/MGauge/Random.hpp>
#include <Grid/Hadrons/Modules/MGauge/Unit.hpp>
#include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp>
#include <Grid/Hadrons/Modules/MSource/Point.hpp>
#include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp>
#include <Grid/Hadrons/Modules/MSource/Z2.hpp>
#include <Grid/Hadrons/Modules/Quark.hpp>

View File

@ -0,0 +1,134 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MAction/DWF.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_DWF_hpp_
#define Hadrons_DWF_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Domain wall quark action *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MAction)
class DWFPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(DWFPar,
std::string, gauge,
unsigned int, Ls,
double , mass,
double , M5);
};
template <typename FImpl>
class TDWF: public Module<DWFPar>
{
public:
TYPE_ALIASES(FImpl,);
public:
// constructor
TDWF(const std::string name);
// destructor
virtual ~TDWF(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(DWF, TDWF<FIMPL>, MAction);
/******************************************************************************
* DWF template implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TDWF<FImpl>::TDWF(const std::string name)
: Module<DWFPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TDWF<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().gauge};
return in;
}
template <typename FImpl>
std::vector<std::string> TDWF<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDWF<FImpl>::setup(void)
{
unsigned int size;
size = 2*env().template lattice4dSize<typename FImpl::DoubledGaugeField>();
env().registerObject(getName(), size, par().Ls);
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDWF<FImpl>::execute(void)
{
LOG(Message) << "Setting up domain wall fermion matrix with m= "
<< par().mass << ", M5= " << par().M5 << " and Ls= "
<< par().Ls << " using gauge field '" << par().gauge << "'"
<< std::endl;
env().createGrid(par().Ls);
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
auto &g4 = *env().getGrid();
auto &grb4 = *env().getRbGrid();
auto &g5 = *env().getGrid(par().Ls);
auto &grb5 = *env().getRbGrid(par().Ls);
FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4,
par().mass, par().M5);
env().setObject(getName(), fMatPt);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_DWF_hpp_

View File

@ -0,0 +1,126 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MAction/Wilson.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Wilson_hpp_
#define Hadrons_Wilson_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* TWilson quark action *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MAction)
class WilsonPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar,
std::string, gauge,
double , mass);
};
template <typename FImpl>
class TWilson: public Module<WilsonPar>
{
public:
TYPE_ALIASES(FImpl,);
public:
// constructor
TWilson(const std::string name);
// destructor
virtual ~TWilson(void) = default;
// dependencies/products
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Wilson, TWilson<FIMPL>, MAction);
/******************************************************************************
* TWilson template implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TWilson<FImpl>::TWilson(const std::string name)
: Module<WilsonPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TWilson<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().gauge};
return in;
}
template <typename FImpl>
std::vector<std::string> TWilson<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TWilson<FImpl>::setup(void)
{
unsigned int size;
size = 2*env().template lattice4dSize<typename FImpl::DoubledGaugeField>();
env().registerObject(getName(), size);
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TWilson<FImpl>::execute()
{
LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass
<< " using gauge field '" << par().gauge << "'" << std::endl;
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
auto &grid = *env().getGrid();
auto &gridRb = *env().getRbGrid();
FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass);
env().setObject(getName(), fMatPt);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Wilson_hpp_

View File

@ -0,0 +1,131 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/Baryon.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Baryon_hpp_
#define Hadrons_Baryon_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Baryon *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
class BaryonPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(BaryonPar,
std::string, q1,
std::string, q2,
std::string, q3,
std::string, output);
};
template <typename FImpl1, typename FImpl2, typename FImpl3>
class TBaryon: public Module<BaryonPar>
{
public:
TYPE_ALIASES(FImpl1, 1);
TYPE_ALIASES(FImpl2, 2);
TYPE_ALIASES(FImpl3, 3);
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
std::vector<std::vector<std::vector<Complex>>>, corr);
};
public:
// constructor
TBaryon(const std::string name);
// destructor
virtual ~TBaryon(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Baryon, ARG(TBaryon<FIMPL, FIMPL, FIMPL>), MContraction);
/******************************************************************************
* TBaryon implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
TBaryon<FImpl1, FImpl2, FImpl3>::TBaryon(const std::string name)
: Module<BaryonPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
std::vector<std::string> TBaryon<FImpl1, FImpl2, FImpl3>::getInput(void)
{
std::vector<std::string> input = {par().q1, par().q2, par().q3};
return input;
}
template <typename FImpl1, typename FImpl2, typename FImpl3>
std::vector<std::string> TBaryon<FImpl1, FImpl2, FImpl3>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void)
{
LOG(Message) << "Computing baryon contractions '" << getName() << "' using"
<< " quarks '" << par().q1 << "', '" << par().q2 << "', and '"
<< par().q3 << "'" << std::endl;
XmlWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q2);
LatticeComplex c(env().getGrid());
Result result;
// FIXME: do contractions
write(writer, "meson", result);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Baryon_hpp_

View File

@ -0,0 +1,137 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/Meson.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Meson_hpp_
#define Hadrons_Meson_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* TMeson *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
class MesonPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar,
std::string, q1,
std::string, q2,
std::string, output,
Gamma::Algebra, gammaSource,
Gamma::Algebra, gammaSink);
};
template <typename FImpl1, typename FImpl2>
class TMeson: public Module<MesonPar>
{
public:
TYPE_ALIASES(FImpl1, 1);
TYPE_ALIASES(FImpl2, 2);
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result, std::vector<Complex>, corr);
};
public:
// constructor
TMeson(const std::string name);
// destructor
virtual ~TMeson(void) = default;
// dependencies/products
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Meson, ARG(TMeson<FIMPL, FIMPL>), MContraction);
/******************************************************************************
* TMeson implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2>
TMeson<FImpl1, FImpl2>::TMeson(const std::string name)
: Module<MesonPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2>
std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void)
{
std::vector<std::string> input = {par().q1, par().q2};
return input;
}
template <typename FImpl1, typename FImpl2>
std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void)
{
std::vector<std::string> output = {getName()};
return output;
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2>
void TMeson<FImpl1, FImpl2>::execute(void)
{
LOG(Message) << "Computing meson contractions '" << getName() << "' using"
<< " quarks '" << par().q1 << "' and '" << par().q2 << "'"
<< std::endl;
XmlWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
LatticeComplex c(env().getGrid());
Gamma gSrc(par().gammaSource), gSnk(par().gammaSink);
Gamma g5(Gamma::Algebra::Gamma5);
std::vector<TComplex> buf;
Result result;
c = trace(gSnk*q1*adj(gSrc)*g5*adj(q2)*g5);
sliceSum(c, buf, Tp);
result.corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
{
result.corr[t] = TensorRemove(buf[t]);
}
write(writer, "meson", result);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Meson_hpp_

View File

@ -0,0 +1,78 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MGauge/Load.cc
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MGauge/Load.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MGauge;
/******************************************************************************
* TLoad implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TLoad::TLoad(const std::string name)
: Module<LoadPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TLoad::getInput(void)
{
std::vector<std::string> in;
return in;
}
std::vector<std::string> TLoad::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TLoad::setup(void)
{
env().registerLattice<LatticeGaugeField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
void TLoad::execute(void)
{
NerscField header;
std::string fileName = par().file + "."
+ std::to_string(env().getTrajectory());
LOG(Message) << "Loading NERSC configuration from file '" << fileName
<< "'" << std::endl;
LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName());
NerscIO::readConfiguration(U, header, fileName);
LOG(Message) << "NERSC header:" << std::endl;
dump_nersc_header(header, LOG(Message));
}

View File

@ -0,0 +1,73 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MGauge/Load.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Load_hpp_
#define Hadrons_Load_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Load a NERSC configuration *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MGauge)
class LoadPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LoadPar,
std::string, file);
};
class TLoad: public Module<LoadPar>
{
public:
// constructor
TLoad(const std::string name);
// destructor
virtual ~TLoad(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Load, TLoad, MGauge);
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Load_hpp_

View File

@ -0,0 +1,69 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MGauge/Random.cc
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MGauge/Random.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MGauge;
/******************************************************************************
* TRandom implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TRandom::TRandom(const std::string name)
: Module<NoPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TRandom::getInput(void)
{
return std::vector<std::string>();
}
std::vector<std::string> TRandom::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TRandom::setup(void)
{
env().registerLattice<LatticeGaugeField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
void TRandom::execute(void)
{
LOG(Message) << "Generating random gauge configuration" << std::endl;
LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName());
SU3::HotConfiguration(*env().get4dRng(), U);
}

View File

@ -0,0 +1,66 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MGauge/Random.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Random_hpp_
#define Hadrons_Random_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Random gauge *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MGauge)
class TRandom: public Module<NoPar>
{
public:
// constructor
TRandom(const std::string name);
// destructor
virtual ~TRandom(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Random, TRandom, MGauge);
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Random_hpp_

View File

@ -0,0 +1,69 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MGauge/Unit.cc
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MGauge/Unit.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MGauge;
/******************************************************************************
* TUnit implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TUnit::TUnit(const std::string name)
: Module<NoPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TUnit::getInput(void)
{
return std::vector<std::string>();
}
std::vector<std::string> TUnit::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TUnit::setup(void)
{
env().registerLattice<LatticeGaugeField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
void TUnit::execute(void)
{
LOG(Message) << "Creating unit gauge configuration" << std::endl;
LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName());
SU3::ColdConfiguration(*env().get4dRng(), U);
}

View File

@ -0,0 +1,66 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MGauge/Unit.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Unit_hpp_
#define Hadrons_Unit_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Unit gauge *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MGauge)
class TUnit: public Module<NoPar>
{
public:
// constructor
TUnit(const std::string name);
// destructor
virtual ~TUnit(void) = default;
// dependencies/products
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Unit, TUnit, MGauge);
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Unit_hpp_

View File

@ -0,0 +1,132 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MSolver/RBPrecCG.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_RBPrecCG_hpp_
#define Hadrons_RBPrecCG_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Schur red-black preconditioned CG *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MSolver)
class RBPrecCGPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(RBPrecCGPar,
std::string, action,
double , residual);
};
template <typename FImpl>
class TRBPrecCG: public Module<RBPrecCGPar>
{
public:
TYPE_ALIASES(FImpl,);
public:
// constructor
TRBPrecCG(const std::string name);
// destructor
virtual ~TRBPrecCG(void) = default;
// dependencies/products
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(RBPrecCG, TRBPrecCG<FIMPL>, MSolver);
/******************************************************************************
* TRBPrecCG template implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TRBPrecCG<FImpl>::TRBPrecCG(const std::string name)
: Module(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TRBPrecCG<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().action};
return in;
}
template <typename FImpl>
std::vector<std::string> TRBPrecCG<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TRBPrecCG<FImpl>::setup(void)
{
auto Ls = env().getObjectLs(par().action);
env().registerObject(getName(), 0, Ls);
env().addOwnership(getName(), par().action);
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TRBPrecCG<FImpl>::execute(void)
{
auto &mat = *(env().template getObject<FMat>(par().action));
auto solver = [&mat, this](FermionField &sol, const FermionField &source)
{
ConjugateGradient<FermionField> cg(par().residual, 10000);
SchurRedBlackDiagMooeeSolve<FermionField> schurSolver(cg);
schurSolver(mat, source, sol);
};
LOG(Message) << "setting up Schur red-black preconditioned CG for"
<< " action '" << par().action << "' with residual "
<< par().residual << std::endl;
env().setObject(getName(), new SolverFn(solver));
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_RBPrecCG_hpp_

View File

@ -0,0 +1,135 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MSource/Point.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Point_hpp_
#define Hadrons_Point_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
Point source
------------
* src_x = delta_x,position
* options:
- position: space-separated integer sequence (e.g. "0 1 1 0")
*/
/******************************************************************************
* TPoint *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MSource)
class PointPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(PointPar,
std::string, position);
};
template <typename FImpl>
class TPoint: public Module<PointPar>
{
public:
TYPE_ALIASES(FImpl,);
public:
// constructor
TPoint(const std::string name);
// destructor
virtual ~TPoint(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSource);
/******************************************************************************
* TPoint template implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TPoint<FImpl>::TPoint(const std::string name)
: Module<PointPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TPoint<FImpl>::getInput(void)
{
std::vector<std::string> in;
return in;
}
template <typename FImpl>
std::vector<std::string> TPoint<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TPoint<FImpl>::setup(void)
{
env().template registerLattice<PropagatorField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TPoint<FImpl>::execute(void)
{
std::vector<int> position = strToVec<int>(par().position);
typename SitePropagator::scalar_object id;
LOG(Message) << "Creating point source at position [" << par().position
<< "]" << std::endl;
PropagatorField &src = *env().template createLattice<PropagatorField>(getName());
id = 1.;
src = zero;
pokeSite(id, src, position);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Point_hpp_

View File

@ -0,0 +1,163 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MSource/SeqGamma.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_SeqGamma_hpp_
#define Hadrons_SeqGamma_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
Sequential source
-----------------------------
* src_x = q_x * theta(x_3 - tA) * theta(tB - x_3) * gamma * exp(i x.mom)
* options:
- q: input propagator (string)
- tA: begin timeslice (integer)
- tB: end timesilce (integer)
- gamma: gamma product to insert (integer)
- mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.")
*/
/******************************************************************************
* SeqGamma *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MSource)
class SeqGammaPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(SeqGammaPar,
std::string, q,
unsigned int, tA,
unsigned int, tB,
Gamma::Algebra, gamma,
std::string, mom);
};
template <typename FImpl>
class TSeqGamma: public Module<SeqGammaPar>
{
public:
TYPE_ALIASES(FImpl,);
public:
// constructor
TSeqGamma(const std::string name);
// destructor
virtual ~TSeqGamma(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(SeqGamma, TSeqGamma<FIMPL>, MSource);
/******************************************************************************
* TSeqGamma implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TSeqGamma<FImpl>::TSeqGamma(const std::string name)
: Module<SeqGammaPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TSeqGamma<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().q};
return in;
}
template <typename FImpl>
std::vector<std::string> TSeqGamma<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TSeqGamma<FImpl>::setup(void)
{
env().template registerLattice<PropagatorField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TSeqGamma<FImpl>::execute(void)
{
if (par().tA == par().tB)
{
LOG(Message) << "Generating gamma_" << par().gamma
<< " sequential source at t= " << par().tA << std::endl;
}
else
{
LOG(Message) << "Generating gamma_" << par().gamma
<< " sequential source for "
<< par().tA << " <= t <= " << par().tB << std::endl;
}
PropagatorField &src = *env().template createLattice<PropagatorField>(getName());
PropagatorField &q = *env().template getObject<PropagatorField>(par().q);
Lattice<iScalar<vInteger>> t(env().getGrid());
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
Gamma g(par().gamma);
std::vector<Real> p;
Complex i(0.0,1.0);
p = strToVec<Real>(par().mom);
ph = zero;
for(unsigned int mu = 0; mu < env().getNd(); mu++)
{
LatticeCoordinate(coor, mu);
ph = ph + p[mu]*coor;
}
ph = exp(i*ph);
LatticeCoordinate(t, Tp);
src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_SeqGamma_hpp_

View File

@ -0,0 +1,151 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MSource/Z2.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Z2_hpp_
#define Hadrons_Z2_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
Z_2 stochastic source
-----------------------------
* src_x = eta_x * theta(x_3 - tA) * theta(tB - x_3)
the eta_x are independent uniform random numbers in {+/- 1 +/- i}
* options:
- tA: begin timeslice (integer)
- tB: end timesilce (integer)
*/
/******************************************************************************
* Z2 stochastic source *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MSource)
class Z2Par: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Z2Par,
unsigned int, tA,
unsigned int, tB);
};
template <typename FImpl>
class TZ2: public Module<Z2Par>
{
public:
TYPE_ALIASES(FImpl,);
public:
// constructor
TZ2(const std::string name);
// destructor
virtual ~TZ2(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource);
/******************************************************************************
* TZ2 template implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TZ2<FImpl>::TZ2(const std::string name)
: Module<Z2Par>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TZ2<FImpl>::getInput(void)
{
std::vector<std::string> in;
return in;
}
template <typename FImpl>
std::vector<std::string> TZ2<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TZ2<FImpl>::setup(void)
{
env().template registerLattice<PropagatorField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TZ2<FImpl>::execute(void)
{
Lattice<iScalar<vInteger>> t(env().getGrid());
LatticeComplex eta(env().getGrid());
Complex shift(1., 1.);
if (par().tA == par().tB)
{
LOG(Message) << "Generating Z_2 wall source at t= " << par().tA
<< std::endl;
}
else
{
LOG(Message) << "Generating Z_2 band for " << par().tA << " <= t <= "
<< par().tB << std::endl;
}
PropagatorField &src = *env().template createLattice<PropagatorField>(getName());
LatticeCoordinate(t, Tp);
bernoulli(*env().get4dRng(), eta);
eta = (2.*eta - shift)*(1./::sqrt(2.));
eta = where((t >= par().tA) and (t <= par().tB), eta, 0.*eta);
src = 1.;
src = src*eta;
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Z2_hpp_

View File

@ -0,0 +1,185 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/Quark.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Quark_hpp_
#define Hadrons_Quark_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* TQuark *
******************************************************************************/
class QuarkPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(QuarkPar,
std::string, source,
std::string, solver);
};
template <typename FImpl>
class TQuark: public Module<QuarkPar>
{
public:
TYPE_ALIASES(FImpl,);
public:
// constructor
TQuark(const std::string name);
// destructor
virtual ~TQuark(void) = default;
// dependencies/products
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
private:
unsigned int Ls_;
SolverFn *solver_{nullptr};
};
MODULE_REGISTER(Quark, TQuark<FIMPL>);
/******************************************************************************
* TQuark implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TQuark<FImpl>::TQuark(const std::string name)
: Module(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TQuark<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().source, par().solver};
return in;
}
template <typename FImpl>
std::vector<std::string> TQuark<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName(), getName() + "_5d"};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TQuark<FImpl>::setup(void)
{
Ls_ = env().getObjectLs(par().solver);
env().template registerLattice<PropagatorField>(getName());
if (Ls_ > 1)
{
env().template registerLattice<PropagatorField>(getName() + "_5d", Ls_);
}
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TQuark<FImpl>::execute(void)
{
LOG(Message) << "Computing quark propagator '" << getName() << "'"
<< std::endl;
FermionField source(env().getGrid(Ls_)), sol(env().getGrid(Ls_)),
tmp(env().getGrid());
std::string propName = (Ls_ == 1) ? getName() : (getName() + "_5d");
PropagatorField &prop = *env().template createLattice<PropagatorField>(propName);
PropagatorField &fullSrc = *env().template getObject<PropagatorField>(par().source);
SolverFn &solver = *env().template getObject<SolverFn>(par().solver);
if (Ls_ > 1)
{
env().template createLattice<PropagatorField>(getName());
}
LOG(Message) << "Inverting using solver '" << par().solver
<< "' on source '" << par().source << "'" << std::endl;
for (unsigned int s = 0; s < Ns; ++s)
for (unsigned int c = 0; c < Nc; ++c)
{
LOG(Message) << "Inversion for spin= " << s << ", color= " << c
<< std::endl;
// source conversion for 4D sources
if (!env().isObject5d(par().source))
{
if (Ls_ == 1)
{
PropToFerm(source, fullSrc, s, c);
}
else
{
source = zero;
PropToFerm(tmp, fullSrc, s, c);
InsertSlice(tmp, source, 0, 0);
InsertSlice(tmp, source, Ls_-1, 0);
axpby_ssp_pplus(source, 0., source, 1., source, 0, 0);
axpby_ssp_pminus(source, 0., source, 1., source, Ls_-1, Ls_-1);
}
}
// source conversion for 5D sources
else
{
if (Ls_ != env().getObjectLs(par().source))
{
HADRON_ERROR("Ls mismatch between quark action and source");
}
else
{
PropToFerm(source, fullSrc, s, c);
}
}
sol = zero;
solver(sol, source);
FermToProp(prop, sol, s, c);
// create 4D propagators from 5D one if necessary
if (Ls_ > 1)
{
PropagatorField &p4d =
*env().template getObject<PropagatorField>(getName());
axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0);
axpby_ssp_pplus(sol, 0., sol, 1., sol, 0, Ls_-1);
ExtractSlice(tmp, sol, 0, 0);
FermToProp(p4d, tmp, s, c);
}
}
}
END_HADRONS_NAMESPACE
#endif // Hadrons_Quark_hpp_

View File

@ -0,0 +1,39 @@
#include <Grid/Hadrons/Modules/___FILEBASENAME___.hpp>
using namespace Grid;
using namespace Hadrons;
/******************************************************************************
* T___FILEBASENAME___ implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
T___FILEBASENAME___::T___FILEBASENAME___(const std::string name)
: Module<___FILEBASENAME___Par>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> T___FILEBASENAME___::getInput(void)
{
std::vector<std::string> in;
return in;
}
std::vector<std::string> T___FILEBASENAME___::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void T___FILEBASENAME___::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
void T___FILEBASENAME___::execute(void)
{
}

View File

@ -0,0 +1,40 @@
#ifndef Hadrons____FILEBASENAME____hpp_
#define Hadrons____FILEBASENAME____hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* ___FILEBASENAME___ *
******************************************************************************/
class ___FILEBASENAME___Par: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(___FILEBASENAME___Par,
unsigned int, i);
};
class T___FILEBASENAME___: public Module<___FILEBASENAME___Par>
{
public:
// constructor
T___FILEBASENAME___(const std::string name);
// destructor
virtual ~T___FILEBASENAME___(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER(___FILEBASENAME___, T___FILEBASENAME___);
END_HADRONS_NAMESPACE
#endif // Hadrons____FILEBASENAME____hpp_

View File

@ -0,0 +1,40 @@
#include <Grid/Hadrons/Modules/___NAMESPACE___/___FILEBASENAME___.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace ___NAMESPACE___;
/******************************************************************************
* T___FILEBASENAME___ implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
T___FILEBASENAME___::T___FILEBASENAME___(const std::string name)
: Module<___FILEBASENAME___Par>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> T___FILEBASENAME___::getInput(void)
{
std::vector<std::string> in;
return in;
}
std::vector<std::string> T___FILEBASENAME___::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void T___FILEBASENAME___::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
void T___FILEBASENAME___::execute(void)
{
}

View File

@ -0,0 +1,44 @@
#ifndef Hadrons____FILEBASENAME____hpp_
#define Hadrons____FILEBASENAME____hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* ___FILEBASENAME___ *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(___NAMESPACE___)
class ___FILEBASENAME___Par: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(___FILEBASENAME___Par,
unsigned int, i);
};
class T___FILEBASENAME___: public Module<___FILEBASENAME___Par>
{
public:
// constructor
T___FILEBASENAME___(const std::string name);
// destructor
virtual ~T___FILEBASENAME___(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(___FILEBASENAME___, T___FILEBASENAME___, ___NAMESPACE___);
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons____FILEBASENAME____hpp_

View File

@ -0,0 +1,81 @@
#ifndef Hadrons____FILEBASENAME____hpp_
#define Hadrons____FILEBASENAME____hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* ___FILEBASENAME___ *
******************************************************************************/
class ___FILEBASENAME___Par: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(___FILEBASENAME___Par,
unsigned int, i);
};
template <typename FImpl>
class T___FILEBASENAME___: public Module<___FILEBASENAME___Par>
{
public:
// constructor
T___FILEBASENAME___(const std::string name);
// destructor
virtual ~T___FILEBASENAME___(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER(___FILEBASENAME___, T___FILEBASENAME___<FIMPL>);
/******************************************************************************
* T___FILEBASENAME___ implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
T___FILEBASENAME___<FImpl>::T___FILEBASENAME___(const std::string name)
: Module<___FILEBASENAME___Par>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> T___FILEBASENAME___<FImpl>::getInput(void)
{
std::vector<std::string> in;
return in;
}
template <typename FImpl>
std::vector<std::string> T___FILEBASENAME___<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void T___FILEBASENAME___<FImpl>::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void T___FILEBASENAME___<FImpl>::execute(void)
{
}
END_HADRONS_NAMESPACE
#endif // Hadrons____FILEBASENAME____hpp_

View File

@ -0,0 +1,85 @@
#ifndef Hadrons____FILEBASENAME____hpp_
#define Hadrons____FILEBASENAME____hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* ___FILEBASENAME___ *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(___NAMESPACE___)
class ___FILEBASENAME___Par: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(___FILEBASENAME___Par,
unsigned int, i);
};
template <typename FImpl>
class T___FILEBASENAME___: public Module<___FILEBASENAME___Par>
{
public:
// constructor
T___FILEBASENAME___(const std::string name);
// destructor
virtual ~T___FILEBASENAME___(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(___FILEBASENAME___, T___FILEBASENAME___<FIMPL>, ___NAMESPACE___);
/******************************************************************************
* T___FILEBASENAME___ implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
T___FILEBASENAME___<FImpl>::T___FILEBASENAME___(const std::string name)
: Module<___FILEBASENAME___Par>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> T___FILEBASENAME___<FImpl>::getInput(void)
{
std::vector<std::string> in;
return in;
}
template <typename FImpl>
std::vector<std::string> T___FILEBASENAME___<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void T___FILEBASENAME___<FImpl>::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void T___FILEBASENAME___<FImpl>::execute(void)
{
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons____FILEBASENAME____hpp_

31
extras/Hadrons/add_module.sh Executable file
View File

@ -0,0 +1,31 @@
#!/usr/bin/env bash
if (( $# != 1 && $# != 2)); then
echo "usage: `basename $0` <module name> [<namespace>]" 1>&2
exit 1
fi
NAME=$1
NS=$2
if (( $# == 1 )); then
if [ -e "Modules/${NAME}.cc" ] || [ -e "Modules/${NAME}.hpp" ]; then
echo "error: files Modules/${NAME}.* already exists" 1>&2
exit 1
fi
sed "s/___FILEBASENAME___/${NAME}/g" Modules/templates/Module.cc.template > Modules/${NAME}.cc
sed "s/___FILEBASENAME___/${NAME}/g" Modules/templates/Module.hpp.template > Modules/${NAME}.hpp
elif (( $# == 2 )); then
mkdir -p Modules/${NS}
if [ -e "Modules/${NS}/${NAME}.cc" ] || [ -e "Modules/${NS}/${NAME}.hpp" ]; then
echo "error: files Modules/${NS}/${NAME}.* already exists" 1>&2
exit 1
fi
TMPCC=".${NS}.${NAME}.tmp.cc"
TMPHPP=".${NS}.${NAME}.tmp.hpp"
sed "s/___FILEBASENAME___/${NAME}/g" Modules/templates/Module_in_NS.cc.template > ${TMPCC}
sed "s/___FILEBASENAME___/${NAME}/g" Modules/templates/Module_in_NS.hpp.template > ${TMPHPP}
sed "s/___NAMESPACE___/${NS}/g" ${TMPCC} > Modules/${NS}/${NAME}.cc
sed "s/___NAMESPACE___/${NS}/g" ${TMPHPP} > Modules/${NS}/${NAME}.hpp
rm -f ${TMPCC} ${TMPHPP}
fi
./make_module_list.sh

View File

@ -0,0 +1,28 @@
#!/usr/bin/env bash
if (( $# != 1 && $# != 2)); then
echo "usage: `basename $0` <module name> [<namespace>]" 1>&2
exit 1
fi
NAME=$1
NS=$2
if (( $# == 1 )); then
if [ -e "Modules/${NAME}.cc" ] || [ -e "Modules/${NAME}.hpp" ]; then
echo "error: files Modules/${NAME}.* already exists" 1>&2
exit 1
fi
sed "s/___FILEBASENAME___/${NAME}/g" Modules/templates/Module_tmp.hpp.template > Modules/${NAME}.hpp
elif (( $# == 2 )); then
mkdir -p Modules/${NS}
if [ -e "Modules/${NS}/${NAME}.cc" ] || [ -e "Modules/${NS}/${NAME}.hpp" ]; then
echo "error: files Modules/${NS}/${NAME}.* already exists" 1>&2
exit 1
fi
TMPCC=".${NS}.${NAME}.tmp.cc"
TMPHPP=".${NS}.${NAME}.tmp.hpp"
sed "s/___FILEBASENAME___/${NAME}/g" Modules/templates/Module_tmp_in_NS.hpp.template > ${TMPHPP}
sed "s/___NAMESPACE___/${NS}/g" ${TMPHPP} > Modules/${NS}/${NAME}.hpp
rm -f ${TMPCC} ${TMPHPP}
fi
./make_module_list.sh

View File

@ -0,0 +1,12 @@
#!/usr/bin/env bash
echo 'modules_cc =\' > modules.inc
find Modules -name '*.cc' -type f -print | sed 's/^/ /;$q;s/$/ \\/' >> modules.inc
echo '' >> modules.inc
echo 'modules_hpp =\' >> modules.inc
find Modules -name '*.hpp' -type f -print | sed 's/^/ /;$q;s/$/ \\/' >> modules.inc
echo '' >> modules.inc
rm -f Modules.hpp
for f in `find Modules -name '*.hpp'`; do
echo "#include <Grid/Hadrons/${f}>" >> Modules.hpp
done

View File

@ -0,0 +1,19 @@
modules_cc =\
Modules/MGauge/Load.cc \
Modules/MGauge/Random.cc \
Modules/MGauge/Unit.cc
modules_hpp =\
Modules/MAction/DWF.hpp \
Modules/MAction/Wilson.hpp \
Modules/MContraction/Baryon.hpp \
Modules/MContraction/Meson.hpp \
Modules/MGauge/Load.hpp \
Modules/MGauge/Random.hpp \
Modules/MGauge/Unit.hpp \
Modules/MSolver/RBPrecCG.hpp \
Modules/MSource/Point.hpp \
Modules/MSource/SeqGamma.hpp \
Modules/MSource/Z2.hpp \
Modules/Quark.hpp

1
extras/Makefile.am Normal file
View File

@ -0,0 +1 @@
SUBDIRS = Hadrons

View File

@ -1,76 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Bitwise.h
Copyright (C) 2016
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_BITWISE_H
#define GRID_BITWISE_H
#include <cassert>
#include <cfloat>
#include <bitset>
#include <climits>
#include <Config.h>
#ifdef GRID_DEFAULT_PRECISION_SINGLE
#define GRID_REAL_BYTES 4
#endif
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
#define GRID_REAL_BYTES 8
#endif
namespace Grid {
void show_binaryrep(const unsigned char* a, size_t size);
template <typename T>
void show_binaryrep(const T& a) {
const char* beg = reinterpret_cast<const char*>(&a);
const char* end = beg + sizeof(a);
unsigned int ctr = 0;
while (beg != end) {
std::cout << std::bitset<CHAR_BIT>(*beg++) << ' ';
ctr++;
if (ctr % GRID_REAL_BYTES == 0) std::cout << '\n';
}
std::cout << '\n';
}
template <typename T>
void bitwise_xor(T& l, T& r, unsigned char* xors) {
assert(sizeof(l) == sizeof(r));
unsigned char* org = reinterpret_cast<unsigned char*>(&l);
unsigned char* cur = reinterpret_cast<unsigned char*>(&r);
int words = sizeof(l) / sizeof(*org);
unsigned char result = 0;
for (int w = 0; w < words; w++) xors[w] = (org[w] ^ cur[w]);
}
}; // namespace
#endif

View File

@ -38,53 +38,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_H
#define GRID_H
///////////////////
// Std C++ dependencies
///////////////////
#include <cassert>
#include <complex>
#include <vector>
#include <iostream>
#include <iomanip>
#include <random>
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>
#include <sys/time.h>
#include <chrono>
///////////////////
// Grid headers
///////////////////
#include <Grid/serialisation/Serialisation.h>
#include "Config.h"
#include <Grid/Timer.h>
#include <Grid/Bitwise.h>
#include <Grid/PerfCount.h>
#include <Grid/Log.h>
#include <Grid/AlignedAllocator.h>
#include <Grid/Simd.h>
#include <Grid/Threads.h>
#include <Grid/Lexicographic.h>
#include <Grid/Init.h>
#include <Grid/Communicator.h>
#include <Grid/Cartesian.h>
#include <Grid/Tensors.h>
#include <Grid/Lattice.h>
#include <Grid/Cshift.h>
#include <Grid/Stencil.h>
#include <Grid/Algorithms.h>
#include <Grid/parallelIO/BinaryIO.h>
#include <Grid/FFT.h>
#include <Grid/qcd/QCD.h>
#include <Grid/parallelIO/NerscIO.h>
#include <Grid/qcd/hmc/NerscCheckpointer.h>
#include <Grid/qcd/hmc/HmcRunner.h>
#include <Grid/GridCore.h>
#include <Grid/GridQCDcore.h>
#include <Grid/qcd/action/Action.h>
#include <Grid/qcd/smearing/Smearing.h>
#include <Grid/qcd/hmc/HMC_aggregate.h>
#endif

81
lib/GridCore.h Normal file
View File

@ -0,0 +1,81 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Grid.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: azusayamaguchi <ayamaguc@YAMAKAZE.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
//
// Grid.h
// simd
//
// Created by Peter Boyle on 09/05/2014.
// Copyright (c) 2014 University of Edinburgh. All rights reserved.
//
#ifndef GRID_BASE_H
#define GRID_BASE_H
///////////////////
// Std C++ dependencies
///////////////////
#include <cassert>
#include <complex>
#include <vector>
#include <iostream>
#include <iomanip>
#include <random>
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>
#include <sys/time.h>
#include <chrono>
///////////////////
// Grid headers
///////////////////
#include "Config.h"
#include <Grid/perfmon/Timer.h>
#include <Grid/perfmon/PerfCount.h>
#include <Grid/log/Log.h>
#include <Grid/allocator/AlignedAllocator.h>
#include <Grid/simd/Simd.h>
#include <Grid/serialisation/Serialisation.h>
#include <Grid/threads/Threads.h>
#include <Grid/util/Util.h>
#include <Grid/communicator/Communicator.h>
#include <Grid/cartesian/Cartesian.h>
#include <Grid/tensors/Tensors.h>
#include <Grid/lattice/Lattice.h>
#include <Grid/cshift/Cshift.h>
#include <Grid/stencil/Stencil.h>
#include <Grid/parallelIO/BinaryIO.h>
#include <Grid/algorithms/Algorithms.h>
#endif

View File

@ -2,12 +2,12 @@
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/hmc/HMC.cc
Source file: ./lib/Grid.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
Author: azusayamaguchi <ayamaguc@YAMAKAZE.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
@ -27,10 +27,16 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#ifndef GRID_QCD_CORE_H
#define GRID_QCD_CORE_H
namespace Grid{
namespace QCD{
/////////////////////////
// Core Grid QCD headers
/////////////////////////
#include <Grid/GridCore.h>
#include <Grid/qcd/QCD.h>
#include <Grid/qcd/spin/Spin.h>
#include <Grid/qcd/utils/Utils.h>
#include <Grid/qcd/representations/Representations.h>
}
}
#endif

1
lib/Hadrons Symbolic link
View File

@ -0,0 +1 @@
../extras/Hadrons

View File

@ -1,4 +1,5 @@
extra_sources=
extra_headers=
if BUILD_COMMS_MPI
extra_sources+=communicator/Communicator_mpi.cc
extra_sources+=communicator/Communicator_base.cc
@ -24,6 +25,12 @@ if BUILD_COMMS_NONE
extra_sources+=communicator/Communicator_base.cc
endif
if BUILD_HDF5
extra_sources+=serialisation/Hdf5IO.cc
extra_headers+=serialisation/Hdf5IO.h
extra_headers+=serialisation/Hdf5Type.h
endif
#
# Libraries
#
@ -32,6 +39,9 @@ include Eigen.inc
lib_LIBRARIES = libGrid.a
libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
CCFILES += $(extra_sources)
HFILES += $(extra_headers)
libGrid_a_SOURCES = $(CCFILES)
libGrid_adir = $(pkgincludedir)
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h

Binary file not shown.

View File

@ -1,154 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Old/Tensor_peek.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_MATH_PEEK_H
#define GRID_MATH_PEEK_H
namespace Grid {
//////////////////////////////////////////////////////////////////////////////
// Peek on a specific index; returns a scalar in that index, tensor inherits rest
//////////////////////////////////////////////////////////////////////////////
// If we hit the right index, return scalar with no further recursion
//template<int Level> inline ComplexF peekIndex(const ComplexF arg) { return arg;}
//template<int Level> inline ComplexD peekIndex(const ComplexD arg) { return arg;}
//template<int Level> inline RealF peekIndex(const RealF arg) { return arg;}
//template<int Level> inline RealD peekIndex(const RealD arg) { return arg;}
#if 0
// Scalar peek, no indices
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
auto peekIndex(const iScalar<vtype> &arg) -> iScalar<vtype>
{
return arg;
}
// Vector peek, one index
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
auto peekIndex(const iVector<vtype,N> &arg,int i) -> iScalar<vtype> // Index matches
{
iScalar<vtype> ret; // return scalar
ret._internal = arg._internal[i];
return ret;
}
// Matrix peek, two indices
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
auto peekIndex(const iMatrix<vtype,N> &arg,int i,int j) -> iScalar<vtype>
{
iScalar<vtype> ret; // return scalar
ret._internal = arg._internal[i][j];
return ret;
}
/////////////
// No match peek for scalar,vector,matrix must forward on either 0,1,2 args. Must have 9 routines with notvalue
/////////////
// scalar
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iScalar<vtype> &arg) -> iScalar<decltype(peekIndex<Level>(arg._internal))>
{
iScalar<decltype(peekIndex<Level>(arg._internal))> ret;
ret._internal= peekIndex<Level>(arg._internal);
return ret;
}
template<int Level,class vtype, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iScalar<vtype> &arg,int i) -> iScalar<decltype(peekIndex<Level>(arg._internal,i))>
{
iScalar<decltype(peekIndex<Level>(arg._internal,i))> ret;
ret._internal=peekIndex<Level>(arg._internal,i);
return ret;
}
template<int Level,class vtype, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iScalar<vtype> &arg,int i,int j) -> iScalar<decltype(peekIndex<Level>(arg._internal,i,j))>
{
iScalar<decltype(peekIndex<Level>(arg._internal,i,j))> ret;
ret._internal=peekIndex<Level>(arg._internal,i,j);
return ret;
}
// vector
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iVector<vtype,N> &arg) -> iVector<decltype(peekIndex<Level>(arg._internal[0])),N>
{
iVector<decltype(peekIndex<Level>(arg._internal[0])),N> ret;
for(int ii=0;ii<N;ii++){
ret._internal[ii]=peekIndex<Level>(arg._internal[ii]);
}
return ret;
}
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iVector<vtype,N> &arg,int i) -> iVector<decltype(peekIndex<Level>(arg._internal[0],i)),N>
{
iVector<decltype(peekIndex<Level>(arg._internal[0],i)),N> ret;
for(int ii=0;ii<N;ii++){
ret._internal[ii]=peekIndex<Level>(arg._internal[ii],i);
}
return ret;
}
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iVector<vtype,N> &arg,int i,int j) -> iVector<decltype(peekIndex<Level>(arg._internal[0],i,j)),N>
{
iVector<decltype(peekIndex<Level>(arg._internal[0],i,j)),N> ret;
for(int ii=0;ii<N;ii++){
ret._internal[ii]=peekIndex<Level>(arg._internal[ii],i,j);
}
return ret;
}
// matrix
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iMatrix<vtype,N> &arg) -> iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N>
{
iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N> ret;
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj]);// Could avoid this because peeking a scalar is dumb
}}
return ret;
}
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iMatrix<vtype,N> &arg,int i) -> iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i)),N>
{
iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i)),N> ret;
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj],i);
}}
return ret;
}
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iMatrix<vtype,N> &arg,int i,int j) -> iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i,j)),N>
{
iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i,j)),N> ret;
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj],i,j);
}}
return ret;
}
#endif
}
#endif

View File

@ -1,127 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Old/Tensor_poke.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_MATH_POKE_H
#define GRID_MATH_POKE_H
namespace Grid {
//////////////////////////////////////////////////////////////////////////////
// Poke a specific index;
//////////////////////////////////////////////////////////////////////////////
#if 0
// Scalar poke
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
void pokeIndex(iScalar<vtype> &ret, const iScalar<vtype> &arg)
{
ret._internal = arg._internal;
}
// Vector poke, one index
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
void pokeIndex(iVector<vtype,N> &ret, const iScalar<vtype> &arg,int i)
{
ret._internal[i] = arg._internal;
}
//Matrix poke, two indices
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
void pokeIndex(iMatrix<vtype,N> &ret, const iScalar<vtype> &arg,int i,int j)
{
ret._internal[i][j] = arg._internal;
}
/////////////
// No match poke for scalar,vector,matrix must forward on either 0,1,2 args. Must have 9 routines with notvalue
/////////////
// scalar
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal))> &arg)
{
pokeIndex<Level>(ret._internal,arg._internal);
}
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal,0))> &arg, int i)
{
pokeIndex<Level>(ret._internal,arg._internal,i);
}
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal,0,0))> &arg,int i,int j)
{
pokeIndex<Level>(ret._internal,arg._internal,i,j);
}
// Vector
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iVector<vtype,N> &ret, iVector<decltype(peekIndex<Level>(ret._internal)),N> &arg)
{
for(int ii=0;ii<N;ii++){
pokeIndex<Level>(ret._internal[ii],arg._internal[ii]);
}
}
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(peekIndex<Level>(ret._internal,0)),N> &arg,int i)
{
for(int ii=0;ii<N;ii++){
pokeIndex<Level>(ret._internal[ii],arg._internal[ii],i);
}
}
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(peekIndex<Level>(ret._internal,0,0)),N> &arg,int i,int j)
{
for(int ii=0;ii<N;ii++){
pokeIndex<Level>(ret._internal[ii],arg._internal[ii],i,j);
}
}
// Matrix
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal)),N> &arg)
{
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj]);
}}
}
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal,0)),N> &arg,int i)
{
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj],i);
}}
}
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal,0,0)),N> &arg, int i,int j)
{
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj],i,j);
}}
}
#endif
}
#endif

View File

@ -42,15 +42,14 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/algorithms/iterative/ConjugateResidual.h>
#include <Grid/algorithms/iterative/NormalEquations.h>
#include <Grid/algorithms/iterative/SchurRedBlack.h>
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
// Lanczos support
#include <Grid/algorithms/iterative/MatrixUtils.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/CoarsenedMatrix.h>
#include <Grid/algorithms/FFT.h>
// Eigen/lanczos
// EigCg

View File

@ -267,8 +267,7 @@ namespace Grid {
SimpleCompressor<siteVector> compressor;
Stencil.HaloExchange(in,compressor);
PARALLEL_FOR_LOOP
for(int ss=0;ss<Grid()->oSites();ss++){
parallel_for(int ss=0;ss<Grid()->oSites();ss++){
siteVector res = zero;
siteVector nbr;
int ptype;
@ -380,8 +379,7 @@ PARALLEL_FOR_LOOP
Subspace.ProjectToSubspace(oProj,oblock);
// blockProject(iProj,iblock,Subspace.subspace);
// blockProject(oProj,oblock,Subspace.subspace);
PARALLEL_FOR_LOOP
for(int ss=0;ss<Grid()->oSites();ss++){
parallel_for(int ss=0;ss<Grid()->oSites();ss++){
for(int j=0;j<nbasis;j++){
if( disp!= 0 ) {
A[p]._odata[ss](j,i) = oProj._odata[ss](j);
@ -427,7 +425,7 @@ PARALLEL_FOR_LOOP
A[p]=zero;
}
GridParallelRNG RNG(Grid()); RNG.SeedRandomDevice();
GridParallelRNG RNG(Grid()); RNG.SeedFixedIntegers(std::vector<int>({55,72,19,17,34}));
Lattice<iScalar<CComplex> > val(Grid()); random(RNG,val);
Complex one(1.0);

View File

@ -25,7 +25,7 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <Grid/GridCore.h>
namespace Grid {
double MultiShiftFunction::approx(double x)

View File

@ -20,7 +20,7 @@
#include<iomanip>
#include<cassert>
#include<algorithms/approx/Remez.h>
#include<Grid/algorithms/approx/Remez.h>
// Constructor
AlgRemez::AlgRemez(double lower, double upper, long precision)

View File

@ -9,7 +9,6 @@ Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -34,21 +33,6 @@ directory
namespace Grid {
struct CG_state {
bool do_repro;
std::vector<RealD> residuals;
CG_state() {reset();}
void reset(){
do_repro = false;
residuals.clear();
}
};
enum CGexec_mode{ Default, ReproducibilityTest };
/////////////////////////////////////////////////////////////
// Base classes for iterative processes based on operators
// single input vec, single output vec.
@ -61,30 +45,12 @@ class ConjugateGradient : public OperatorFunction<Field> {
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
// Reproducibility controls
bool ReproTest;
CG_state CGState; //to check reproducibility by repeating the CG
ReproducibilityState<typename Field::vector_object> ReprTest; // for the inner proucts
// Constructor
ConjugateGradient(RealD tol, Integer maxit, CGexec_mode Mode = Default)
: Tolerance(tol),MaxIterations(maxit){
switch(Mode)
{
case Default :
ErrorOnNoConverge = true;
ReproTest = false;
case ReproducibilityTest :
ErrorOnNoConverge = false;
ReproTest = true;
}
};
void set_reproducibility_interval(unsigned int interval){
ReprTest.interval = interval;
}
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol),
MaxIterations(maxit),
ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
Field &psi) {
@ -96,37 +62,34 @@ class ConjugateGradient : public OperatorFunction<Field> {
Field p(src);
Field mmp(src);
Field r(src);
Field psi_start(psi);// save for the repro test
if (CGState.do_repro && ReproTest)
std::cout << GridLogMessage << "Starting reproducibility test, full check every "
<< ReprTest.interval << " calls" << std::endl;
if(!ReprTest.do_check)
ReprTest.reset();
ReprTest.enable_reprocheck=ReproTest;
// Initial residual computation & set up
RealD guess = norm2(psi, ReprTest);
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
Linop.HermOpAndNorm(psi, mmp, d, b);// eventually split this for the norm check
Linop.HermOpAndNorm(psi, mmp, d, b);
r = src - mmp;
p = r;
a = norm2(p, ReprTest);
a = norm2(p);
cp = a;
ssq = norm2(src, ReprTest);
ssq = norm2(src);
std::cout << GridLogIterative << "ConjugateGradient: guess " << guess << std::endl;
std::cout << GridLogIterative << "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << "ConjugateGradient: mp " << d << std::endl;
std::cout << GridLogIterative << "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << "ConjugateGradient: p " << a << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: guess " << guess << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: p " << a << std::endl;
RealD rsq = Tolerance * Tolerance * ssq;
@ -146,10 +109,10 @@ class ConjugateGradient : public OperatorFunction<Field> {
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
c = cp;// old residual
c = cp;
MatrixTimer.Start();
Linop.HermOpAndNorm(p, mmp, d, qq);// mmp = Ap, d=pAp
Linop.HermOpAndNorm(p, mmp, d, qq);
MatrixTimer.Stop();
LinalgTimer.Start();
@ -157,31 +120,14 @@ class ConjugateGradient : public OperatorFunction<Field> {
// ComplexD dck = innerProduct(p,mmp);
a = c / d;
b_pred = a * (a * qq - d) / c;// a check
b_pred = a * (a * qq - d) / c;
axpy(r, -a, mmp, r);// new residual r = r_old - a * Ap
cp = norm2(r, ReprTest); // bookkeeping this norm
if (ReproTest && !CGState.do_repro) {
CGState.residuals.push_back(cp); // save residuals state
std::cout << GridLogIterative << "ReproTest: Saving state" << std::endl;
}
if (ReproTest && CGState.do_repro){
// check that the residual agrees with the previous run
std::cout << GridLogIterative << "ReproTest: Checking state k=" << k << std::endl;
if (cp != CGState.residuals[k-1]){
std::cout << GridLogMessage << "Failing reproducibility test";
std::cout << GridLogMessage << " at k=" << k << std::endl;
std::cout << GridLogMessage << "saved residual = " << CGState.residuals[k-1]
<< " cp = " << cp << std::endl;
exit(1); // exit after the first failure
}
}
cp = axpy_norm(r, -a, mmp, r);
b = cp / c;
// Fuse these loops ; should be really easy
psi = a * p + psi; // update solution
p = p * b + r; // update search direction
psi = a * p + psi;
p = p * b + r;
LinalgTimer.Stop();
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
@ -211,29 +157,14 @@ class ConjugateGradient : public OperatorFunction<Field> {
std::cout << std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
if (! (CGState.do_repro && ReproTest)){
CGState.do_repro = true;
ReprTest.do_check = true;
ReprTest.reset_counter();
this->operator()(Linop, src, psi_start);// run the repro test
if (ReprTest.success)
std::cout << GridLogMessage << "Reproducibility test passed" << std::endl;
else{
std::cout << GridLogMessage << "Reproducibility test failed" << std::endl;
exit(1);
}
}
// Clear state
CGState.reset();
ReprTest.reset();
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
<< std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
};
}

View File

@ -35,6 +35,7 @@ namespace Grid {
class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
public:
RealD Tolerance;
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations;
Integer MaxOuterIterations;
GridBase* SinglePrecGrid; //Grid for single-precision fields
@ -42,12 +43,16 @@ namespace Grid {
LinearOperatorBase<FieldF> &Linop_f;
LinearOperatorBase<FieldD> &Linop_d;
Integer TotalInnerIterations; //Number of inner CG iterations
Integer TotalOuterIterations; //Number of restarts
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
LinearFunction<FieldF> *guesser;
MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
Linop_f(_Linop_f), Linop_d(_Linop_d),
Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
OuterLoopNormMult(100.), guesser(NULL){ };
void useGuesser(LinearFunction<FieldF> &g){
@ -55,6 +60,8 @@ namespace Grid {
}
void operator() (const FieldD &src_d_in, FieldD &sol_d){
TotalInnerIterations = 0;
GridStopWatch TotalTimer;
TotalTimer.Start();
@ -74,7 +81,7 @@ namespace Grid {
FieldD src_d(DoublePrecGrid);
src_d = src_d_in; //source for next inner iteration, computed from residual during operation
RealD inner_tol = Tolerance;
RealD inner_tol = InnerTolerance;
FieldF src_f(SinglePrecGrid);
src_f.checkerboard = cb;
@ -89,7 +96,9 @@ namespace Grid {
GridStopWatch PrecChangeTimer;
for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count
for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
//Compute double precision rsd and also new RHS vector.
Linop_d.HermOp(sol_d, tmp_d);
RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
@ -117,6 +126,7 @@ namespace Grid {
InnerCGtimer.Start();
CG_f(Linop_f, src_f, sol_f);
InnerCGtimer.Stop();
TotalInnerIterations += CG_f.IterationsToComplete;
//Convert sol back to double and add to double prec solution
PrecChangeTimer.Start();
@ -131,9 +141,11 @@ namespace Grid {
ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
CG_d(Linop_d, src_d_in, sol_d);
TotalFinalStepIterations = CG_d.IterationsToComplete;
TotalTimer.Stop();
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl;
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total time " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
}
};

View File

@ -31,11 +31,16 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <string.h> //memset
#ifdef USE_LAPACK
#ifdef USE_MKL
#include<mkl_lapack.h>
#else
void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
double *vl, double *vu, int *il, int *iu, double *abstol,
int *m, double *w, double *z, int *ldz, int *isuppz,
double *work, int *lwork, int *iwork, int *liwork,
int *info);
//#include <lapacke/lapacke.h>
#endif
#endif
#include "DenseMatrix.h"
#include "EigenSort.h"
@ -59,15 +64,16 @@ public:
int Nstop; // Number of evecs checked for convergence
int Nk; // Number of converged sought
int Np; // Np -- Number of spare vecs in krylov space
int Np; // Np -- Number of spare vecs in kryloc space
int Nm; // Nm -- total number of vectors
RealD OrthoTime;
RealD eresid;
SortEigen<Field> _sort;
// GridCartesian &_fgrid;
LinearOperatorBase<Field> &_Linop;
OperatorFunction<Field> &_poly;
@ -124,23 +130,23 @@ public:
GridBase *grid = evec[0]._grid;
Field w(grid);
std::cout << "RitzMatrix "<<std::endl;
std::cout<<GridLogMessage << "RitzMatrix "<<std::endl;
for(int i=0;i<k;i++){
_poly(_Linop,evec[i],w);
std::cout << "["<<i<<"] ";
std::cout<<GridLogMessage << "["<<i<<"] ";
for(int j=0;j<k;j++){
ComplexD in = innerProduct(evec[j],w);
if ( fabs((double)i-j)>1 ) {
if (abs(in) >1.0e-9 ) {
std::cout<<"oops"<<std::endl;
std::cout<<GridLogMessage<<"oops"<<std::endl;
abort();
} else
std::cout << " 0 ";
std::cout<<GridLogMessage << " 0 ";
} else {
std::cout << " "<<in<<" ";
std::cout<<GridLogMessage << " "<<in<<" ";
}
}
std::cout << std::endl;
std::cout<<GridLogMessage << std::endl;
}
}
@ -174,10 +180,10 @@ public:
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
// 7. vk+1 := wk/βk+1
// std::cout << "alpha = " << zalph << " beta "<<beta<<std::endl;
std::cout<<GridLogMessage << "alpha = " << zalph << " beta "<<beta<<std::endl;
const RealD tiny = 1.0e-20;
if ( beta < tiny ) {
std::cout << " beta is tiny "<<beta<<std::endl;
std::cout<<GridLogMessage << " beta is tiny "<<beta<<std::endl;
}
lmd[k] = alph;
lme[k] = beta;
@ -253,6 +259,7 @@ public:
}
#ifdef USE_LAPACK
#define LAPACK_INT long long
void diagonalize_lapack(DenseVector<RealD>& lmd,
DenseVector<RealD>& lme,
int N1,
@ -262,7 +269,7 @@ public:
const int size = Nm;
// tevals.resize(size);
// tevecs.resize(size);
int NN = N1;
LAPACK_INT NN = N1;
double evals_tmp[NN];
double evec_tmp[NN][NN];
memset(evec_tmp[0],0,sizeof(double)*NN*NN);
@ -276,19 +283,19 @@ public:
if (i==j) evals_tmp[i] = lmd[i];
if (j==(i-1)) EE[j] = lme[j];
}
int evals_found;
int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
int liwork = 3+NN*10 ;
int iwork[liwork];
LAPACK_INT evals_found;
LAPACK_INT lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
LAPACK_INT liwork = 3+NN*10 ;
LAPACK_INT iwork[liwork];
double work[lwork];
int isuppz[2*NN];
LAPACK_INT isuppz[2*NN];
char jobz = 'V'; // calculate evals & evecs
char range = 'I'; // calculate all evals
// char range = 'A'; // calculate all evals
char uplo = 'U'; // refer to upper half of original matrix
char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
int ifail[NN];
int info;
long long info;
// int total = QMP_get_number_of_nodes();
// int node = QMP_get_node_number();
// GridBase *grid = evec[0]._grid;
@ -296,14 +303,18 @@ public:
int node = grid->_processor;
int interval = (NN/total)+1;
double vl = 0.0, vu = 0.0;
int il = interval*node+1 , iu = interval*(node+1);
LAPACK_INT il = interval*node+1 , iu = interval*(node+1);
if (iu > NN) iu=NN;
double tol = 0.0;
if (1) {
memset(evals_tmp,0,sizeof(double)*NN);
if ( il <= NN){
printf("total=%d node=%d il=%d iu=%d\n",total,node,il,iu);
#ifdef USE_MKL
dstegr(&jobz, &range, &NN,
#else
LAPACK_dstegr(&jobz, &range, &NN,
#endif
(double*)DD, (double*)EE,
&vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
&tol, // tolerance
@ -335,6 +346,7 @@ public:
lmd [NN-1-i]=evals_tmp[i];
}
}
#undef LAPACK_INT
#endif
@ -365,12 +377,14 @@ public:
// diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
#endif
int Niter = 100*N1;
int Niter = 10000*N1;
int kmin = 1;
int kmax = N2;
// (this should be more sophisticated)
for(int iter=0; iter<Niter; ++iter){
for(int iter=0; ; ++iter){
if ( (iter+1)%(100*N1)==0)
std::cout<<GridLogMessage << "[QL method] Not converged - iteration "<<iter+1<<"\n";
// determination of 2x2 leading submatrix
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
@ -399,11 +413,11 @@ public:
_sort.push(lmd3,N2);
_sort.push(lmd2,N2);
for(int k=0; k<N2; ++k){
if (fabs(lmd2[k] - lmd3[k]) >SMALL) std::cout <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl;
// if (fabs(lme2[k] - lme[k]) >SMALL) std::cout <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl;
if (fabs(lmd2[k] - lmd3[k]) >SMALL) std::cout<<GridLogMessage <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl;
// if (fabs(lme2[k] - lme[k]) >SMALL) std::cout<<GridLogMessage <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl;
}
for(int k=0; k<N1*N1; ++k){
// if (fabs(Qt2[k] - Qt[k]) >SMALL) std::cout <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl;
// if (fabs(Qt2[k] - Qt[k]) >SMALL) std::cout<<GridLogMessage <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl;
}
}
#endif
@ -418,7 +432,7 @@ public:
}
}
}
std::cout << "[QL method] Error - Too many iteration: "<<Niter<<"\n";
std::cout<<GridLogMessage << "[QL method] Error - Too many iteration: "<<Niter<<"\n";
abort();
}
@ -435,6 +449,7 @@ public:
DenseVector<Field>& evec,
int k)
{
double t0=-usecond()/1e6;
typedef typename Field::scalar_type MyComplex;
MyComplex ip;
@ -453,6 +468,8 @@ public:
w = w - ip * evec[j];
}
normalise(w);
t0+=usecond()/1e6;
OrthoTime +=t0;
}
void setUnit_Qt(int Nm, DenseVector<RealD> &Qt) {
@ -486,10 +503,10 @@ until convergence
GridBase *grid = evec[0]._grid;
assert(grid == src._grid);
std::cout << " -- Nk = " << Nk << " Np = "<< Np << std::endl;
std::cout << " -- Nm = " << Nm << std::endl;
std::cout << " -- size of eval = " << eval.size() << std::endl;
std::cout << " -- size of evec = " << evec.size() << std::endl;
std::cout<<GridLogMessage << " -- Nk = " << Nk << " Np = "<< Np << std::endl;
std::cout<<GridLogMessage << " -- Nm = " << Nm << std::endl;
std::cout<<GridLogMessage << " -- size of eval = " << eval.size() << std::endl;
std::cout<<GridLogMessage << " -- size of evec = " << evec.size() << std::endl;
assert(Nm == evec.size() && Nm == eval.size());
@ -500,6 +517,7 @@ until convergence
DenseVector<int> Iconv(Nm);
DenseVector<Field> B(Nm,grid); // waste of space replicating
// DenseVector<Field> Btemp(Nm,grid); // waste of space replicating
Field f(grid);
Field v(grid);
@ -515,35 +533,48 @@ until convergence
// (uniform vector) Why not src??
// evec[0] = 1.0;
evec[0] = src;
std:: cout <<"norm2(src)= " << norm2(src)<<std::endl;
std:: cout<<GridLogMessage <<"norm2(src)= " << norm2(src)<<std::endl;
// << src._grid << std::endl;
normalise(evec[0]);
std:: cout <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl;
std:: cout<<GridLogMessage <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl;
// << evec[0]._grid << std::endl;
// Initial Nk steps
OrthoTime=0.;
double t0=usecond()/1e6;
for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
// std:: cout <<"norm2(evec[1])= " << norm2(evec[1]) << std::endl;
// std:: cout <<"norm2(evec[2])= " << norm2(evec[2]) << std::endl;
double t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::Initial steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
// std:: cout<<GridLogMessage <<"norm2(evec[1])= " << norm2(evec[1]) << std::endl;
// std:: cout<<GridLogMessage <<"norm2(evec[2])= " << norm2(evec[2]) << std::endl;
RitzMatrix(evec,Nk);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::RitzMatrix: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
for(int k=0; k<Nk; ++k){
// std:: cout <<"eval " << k << " " <<eval[k] << std::endl;
// std:: cout <<"lme " << k << " " << lme[k] << std::endl;
// std:: cout<<GridLogMessage <<"eval " << k << " " <<eval[k] << std::endl;
// std:: cout<<GridLogMessage <<"lme " << k << " " << lme[k] << std::endl;
}
// Restarting loop begins
for(int iter = 0; iter<Niter; ++iter){
std::cout<<"\n Restart iteration = "<< iter << std::endl;
std::cout<<GridLogMessage<<"\n Restart iteration = "<< iter << std::endl;
//
// Rudy does a sort first which looks very different. Getting fed up with sorting out the algo defs.
// We loop over
//
OrthoTime=0.;
for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: "<<Np <<" steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
f *= lme[Nm-1];
RitzMatrix(evec,k2);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: RitzMatrix: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
// getting eigenvalues
for(int k=0; k<Nm; ++k){
@ -552,18 +583,27 @@ until convergence
}
setUnit_Qt(Nm,Qt);
diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
// sorting
_sort.push(eval2,Nm);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: eval sorting: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
// Implicitly shifted QR transformations
setUnit_Qt(Nm,Qt);
for(int ip=0; ip<k2; ++ip){
std::cout<<GridLogMessage << "eval "<< ip << " "<< eval2[ip] << std::endl;
}
for(int ip=k2; ip<Nm; ++ip){
std::cout << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl;
std::cout<<GridLogMessage << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl;
qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
}
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::qr_decomp: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
if (0) {
for(int i=0; i<(Nk+1); ++i) B[i] = 0.0;
for(int j=k1-1; j<k2+1; ++j){
@ -572,14 +612,38 @@ until convergence
B[j] += Qt[k+Nm*j] * evec[k];
}
}
for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j];
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::QR Rotate: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
}
if (1) {
for(int i=0; i<(Nk+1); ++i) {
B[i] = 0.0;
B[i].checkerboard = evec[0].checkerboard;
}
int j_block = 24; int k_block=24;
PARALLEL_FOR_LOOP
for(int ss=0;ss < grid->oSites();ss++){
for(int jj=k1-1; jj<k2+1; jj += j_block)
for(int kk=0; kk<Nm; kk += k_block)
for(int j=jj; (j<(k2+1)) && j<(jj+j_block); ++j){
for(int k=kk; (k<Nm) && k<(kk+k_block) ; ++k){
B[j]._odata[ss] +=Qt[k+Nm*j] * evec[k]._odata[ss];
}
}
}
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::QR rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
}
for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j];
// Compressed vector f and beta(k2)
f *= Qt[Nm-1+Nm*(k2-1)];
f += lme[k2-1] * evec[k2];
beta_k = norm2(f);
beta_k = sqrt(beta_k);
std::cout<<" beta(k) = "<<beta_k<<std::endl;
std::cout<<GridLogMessage<<" beta(k) = "<<beta_k<<std::endl;
RealD betar = 1.0/beta_k;
evec[k2] = betar * f;
@ -592,7 +656,10 @@ until convergence
}
setUnit_Qt(Nm,Qt);
diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
if (0) {
for(int k = 0; k<Nk; ++k) B[k]=0.0;
for(int j = 0; j<Nk; ++j){
@ -600,12 +667,34 @@ until convergence
B[j].checkerboard = evec[k].checkerboard;
B[j] += Qt[k+j*Nm] * evec[k];
}
// std::cout << "norm(B["<<j<<"])="<<norm2(B[j])<<std::endl;
std::cout<<GridLogMessage << "norm(B["<<j<<"])="<<norm2(B[j])<<std::endl;
}
// _sort.push(eval2,B,Nk);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::Convergence rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
}
if (1) {
for(int i=0; i<(Nk+1); ++i) {
B[i] = 0.0;
B[i].checkerboard = evec[0].checkerboard;
}
int j_block = 24; int k_block=24;
PARALLEL_FOR_LOOP
for(int ss=0;ss < grid->oSites();ss++){
for(int jj=0; jj<Nk; jj += j_block)
for(int kk=0; kk<Nk; kk += k_block)
for(int j=jj; (j<Nk) && j<(jj+j_block); ++j){
for(int k=kk; (k<Nk) && k<(kk+k_block) ; ++k){
B[j]._odata[ss] +=Qt[k+Nm*j] * evec[k]._odata[ss];
}
}
}
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::convergence rotation : "<<t1-t0<< "seconds"<<std::endl; t0=t1;
}
Nconv = 0;
// std::cout << std::setiosflags(std::ios_base::scientific);
// std::cout<<GridLogMessage << std::setiosflags(std::ios_base::scientific);
for(int i=0; i<Nk; ++i){
// _poly(_Linop,B[i],v);
@ -613,14 +702,16 @@ until convergence
RealD vnum = real(innerProduct(B[i],v)); // HermOp.
RealD vden = norm2(B[i]);
RealD vv0 = norm2(v);
eval2[i] = vnum/vden;
v -= eval2[i]*B[i];
RealD vv = norm2(v);
std::cout.precision(13);
std::cout << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
std::cout << "eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i];
std::cout <<" |H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv<< std::endl;
std::cout<<GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
std::cout<<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i];
std::cout<<"|H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv;
std::cout<<" "<< vnum/(sqrt(vden)*sqrt(vv0)) << std::endl;
// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
if((vv<eresid*eresid) && (i == Nconv) ){
@ -629,17 +720,19 @@ until convergence
}
} // i-loop end
// std::cout << std::resetiosflags(std::ios_base::scientific);
// std::cout<<GridLogMessage << std::resetiosflags(std::ios_base::scientific);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::convergence testing: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
std::cout<<" #modes converged: "<<Nconv<<std::endl;
std::cout<<GridLogMessage<<" #modes converged: "<<Nconv<<std::endl;
if( Nconv>=Nstop ){
goto converged;
}
} // end of iter loop
std::cout<<"\n NOT converged.\n";
std::cout<<GridLogMessage<<"\n NOT converged.\n";
abort();
converged:
@ -652,10 +745,10 @@ until convergence
}
_sort.push(eval,evec,Nconv);
std::cout << "\n Converged\n Summary :\n";
std::cout << " -- Iterations = "<< Nconv << "\n";
std::cout << " -- beta(k) = "<< beta_k << "\n";
std::cout << " -- Nconv = "<< Nconv << "\n";
std::cout<<GridLogMessage << "\n Converged\n Summary :\n";
std::cout<<GridLogMessage << " -- Iterations = "<< Nconv << "\n";
std::cout<<GridLogMessage << " -- beta(k) = "<< beta_k << "\n";
std::cout<<GridLogMessage << " -- Nconv = "<< Nconv << "\n";
}
/////////////////////////////////////////////////
@ -678,25 +771,25 @@ until convergence
}
}
std::cout<<"Lanczos_Factor start/end " <<start <<"/"<<end<<std::endl;
std::cout<<GridLogMessage<<"Lanczos_Factor start/end " <<start <<"/"<<end<<std::endl;
// Starting from scratch, bq[0] contains a random vector and |bq[0]| = 1
int first;
if(start == 0){
std::cout << "start == 0\n"; //TESTING
std::cout<<GridLogMessage << "start == 0\n"; //TESTING
_poly(_Linop,bq[0],bf);
alpha = real(innerProduct(bq[0],bf));//alpha = bq[0]^dag A bq[0]
std::cout << "alpha = " << alpha << std::endl;
std::cout<<GridLogMessage << "alpha = " << alpha << std::endl;
bf = bf - alpha * bq[0]; //bf = A bq[0] - alpha bq[0]
H[0][0]=alpha;
std::cout << "Set H(0,0) to " << H[0][0] << std::endl;
std::cout<<GridLogMessage << "Set H(0,0) to " << H[0][0] << std::endl;
first = 1;
@ -716,19 +809,19 @@ until convergence
beta = 0;sqbt = 0;
std::cout << "cont is true so setting beta to zero\n";
std::cout<<GridLogMessage << "cont is true so setting beta to zero\n";
} else {
beta = norm2(bf);
sqbt = sqrt(beta);
std::cout << "beta = " << beta << std::endl;
std::cout<<GridLogMessage << "beta = " << beta << std::endl;
}
for(int j=first;j<end;j++){
std::cout << "Factor j " << j <<std::endl;
std::cout<<GridLogMessage << "Factor j " << j <<std::endl;
if(cont){ // switches to factoring; understand start!=0 and initial bf value is right.
bq[j] = bf; cont = false;
@ -751,7 +844,7 @@ until convergence
beta = fnorm;
sqbt = sqrt(beta);
std::cout << "alpha = " << alpha << " fnorm = " << fnorm << '\n';
std::cout<<GridLogMessage << "alpha = " << alpha << " fnorm = " << fnorm << '\n';
///Iterative refinement of orthogonality V = [ bq[0] bq[1] ... bq[M] ]
int re = 0;
@ -786,8 +879,8 @@ until convergence
bck = sqrt( nmbex );
re++;
}
std::cout << "Iteratively refined orthogonality, changes alpha\n";
if(re > 1) std::cout << "orthagonality refined " << re << " times" <<std::endl;
std::cout<<GridLogMessage << "Iteratively refined orthogonality, changes alpha\n";
if(re > 1) std::cout<<GridLogMessage << "orthagonality refined " << re << " times" <<std::endl;
H[j][j]=alpha;
}
@ -802,11 +895,13 @@ until convergence
void ImplicitRestart(int TM, DenseVector<RealD> &evals, DenseVector<DenseVector<RealD> > &evecs, DenseVector<Field> &bq, Field &bf, int cont)
{
std::cout << "ImplicitRestart begin. Eigensort starting\n";
std::cout<<GridLogMessage << "ImplicitRestart begin. Eigensort starting\n";
DenseMatrix<RealD> H; Resize(H,Nm,Nm);
#ifndef USE_LAPACK
EigenSort(evals, evecs);
#endif
///Assign shifts
int K=Nk;
@ -829,15 +924,15 @@ until convergence
/// Shifted H defines a new K step Arnoldi factorization
RealD beta = H[ff][ff-1];
RealD sig = Q[TM - 1][ff - 1];
std::cout << "beta = " << beta << " sig = " << real(sig) <<std::endl;
std::cout<<GridLogMessage << "beta = " << beta << " sig = " << real(sig) <<std::endl;
std::cout << "TM = " << TM << " ";
std::cout << norm2(bq[0]) << " -- before" <<std::endl;
std::cout<<GridLogMessage << "TM = " << TM << " ";
std::cout<<GridLogMessage << norm2(bq[0]) << " -- before" <<std::endl;
/// q -> q Q
times_real(bq, Q, TM);
std::cout << norm2(bq[0]) << " -- after " << ff <<std::endl;
std::cout<<GridLogMessage << norm2(bq[0]) << " -- after " << ff <<std::endl;
bf = beta* bq[ff] + sig* bf;
/// Do the rest of the factorization
@ -861,7 +956,7 @@ until convergence
int ff = Lanczos_Factor(0, M, cont, bq,bf,H); // 0--M to begin with
if(ff < M) {
std::cout << "Krylov: aborting ff "<<ff <<" "<<M<<std::endl;
std::cout<<GridLogMessage << "Krylov: aborting ff "<<ff <<" "<<M<<std::endl;
abort(); // Why would this happen?
}
@ -870,7 +965,7 @@ until convergence
for(int it = 0; it < Niter && (converged < Nk); ++it) {
std::cout << "Krylov: Iteration --> " << it << std::endl;
std::cout<<GridLogMessage << "Krylov: Iteration --> " << it << std::endl;
int lock_num = lock ? converged : 0;
DenseVector<RealD> tevals(M - lock_num );
DenseMatrix<RealD> tevecs; Resize(tevecs,M - lock_num,M - lock_num);
@ -886,7 +981,7 @@ until convergence
Wilkinson<RealD>(H, evals, evecs, small);
// Check();
std::cout << "Done "<<std::endl;
std::cout<<GridLogMessage << "Done "<<std::endl;
}
@ -951,7 +1046,7 @@ until convergence
DenseVector<RealD> &tevals, DenseVector<DenseVector<RealD> > &tevecs,
int lock, int converged)
{
std::cout << "Converged " << converged << " so far." << std::endl;
std::cout<<GridLogMessage << "Converged " << converged << " so far." << std::endl;
int lock_num = lock ? converged : 0;
int M = Nm;
@ -966,7 +1061,9 @@ until convergence
RealD small=1.0e-16;
Wilkinson<RealD>(AH, tevals, tevecs, small);
#ifndef USE_LAPACK
EigenSort(tevals, tevecs);
#endif
RealD resid_nrm= norm2(bf);
@ -977,7 +1074,7 @@ until convergence
RealD diff = 0;
diff = abs( tevecs[i][Nm - 1 - lock_num] ) * resid_nrm;
std::cout << "residual estimate " << SS-1-i << " " << diff << " of (" << tevals[i] << ")" << std::endl;
std::cout<<GridLogMessage << "residual estimate " << SS-1-i << " " << diff << " of (" << tevals[i] << ")" << std::endl;
if(diff < converged) {
@ -993,13 +1090,13 @@ until convergence
lock_num++;
}
converged++;
std::cout << " converged on eval " << converged << " of " << Nk << std::endl;
std::cout<<GridLogMessage << " converged on eval " << converged << " of " << Nk << std::endl;
} else {
break;
}
}
#endif
std::cout << "Got " << converged << " so far " <<std::endl;
std::cout<<GridLogMessage << "Got " << converged << " so far " <<std::endl;
}
///Check
@ -1008,7 +1105,9 @@ until convergence
DenseVector<RealD> goodval(this->get);
#ifndef USE_LAPACK
EigenSort(evals,evecs);
#endif
int NM = Nm;
@ -1080,10 +1179,10 @@ say con = 2
**/
template<class T>
static void Lock(DenseMatrix<T> &H, // Hess mtx
DenseMatrix<T> &Q, // Lock Transform
T val, // value to be locked
int con, // number already locked
static void Lock(DenseMatrix<T> &H, ///Hess mtx
DenseMatrix<T> &Q, ///Lock Transform
T val, ///value to be locked
int con, ///number already locked
RealD small,
int dfg,
bool herm)

View File

@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <iomanip>
#include <complex>
#include <typeinfo>
#include <Grid.h>
#include <Grid/Grid.h>
/** Sign function **/

View File

@ -141,5 +141,85 @@ namespace Grid {
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagTwoSolve {
private:
OperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver) :
_HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=0;
};
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
Field src_e(grid);
Field src_o(grid);
Field sol_e(grid);
Field sol_o(grid);
Field tmp(grid);
Field Mtmp(grid);
Field resid(fgrid);
pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
// get the right MpcDag
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
_HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
// Verify the unprec residual
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
}
};
}
#endif

View File

@ -0,0 +1,66 @@
#include <Grid/GridCore.h>
namespace Grid {
int PointerCache::victim;
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
void *PointerCache::Insert(void *ptr,size_t bytes) {
if (bytes < 4096 ) return NULL;
#ifdef GRID_OMP
assert(omp_in_parallel()==0);
#endif
void * ret = NULL;
int v = -1;
for(int e=0;e<Ncache;e++) {
if ( Entries[e].valid==0 ) {
v=e;
break;
}
}
if ( v==-1 ) {
v=victim;
victim = (victim+1)%Ncache;
}
if ( Entries[v].valid ) {
ret = Entries[v].address;
Entries[v].valid = 0;
Entries[v].address = NULL;
Entries[v].bytes = 0;
}
Entries[v].address=ptr;
Entries[v].bytes =bytes;
Entries[v].valid =1;
return ret;
}
void *PointerCache::Lookup(size_t bytes) {
if (bytes < 4096 ) return NULL;
#ifdef _OPENMP
assert(omp_in_parallel()==0);
#endif
for(int e=0;e<Ncache;e++){
if ( Entries[e].valid && ( Entries[e].bytes == bytes ) ) {
Entries[e].valid = 0;
return Entries[e].address;
}
}
return NULL;
}
}

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -42,9 +42,32 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
namespace Grid {
class PointerCache {
private:
static const int Ncache=8;
static int victim;
typedef struct {
void *address;
size_t bytes;
int valid;
} PointerCacheEntry;
static PointerCacheEntry Entries[Ncache];
public:
static void *Insert(void *ptr,size_t bytes) ;
static void *Lookup(size_t bytes) ;
};
////////////////////////////////////////////////////////////////////
// A lattice of something, but assume the something is SIMDized.
////////////////////////////////////////////////////////////////////
template<typename _Tp>
class alignedAllocator {
public:
@ -66,27 +89,27 @@ public:
pointer allocate(size_type __n, const void* _p= 0)
{
size_type bytes = __n*sizeof(_Tp);
_Tp *ptr = (_Tp *) PointerCache::Lookup(bytes);
#ifdef HAVE_MM_MALLOC_H
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,128);
#else
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(128,bytes);
#endif
_Tp tmp;
#ifdef GRID_NUMA
#pragma omp parallel for schedule(static)
for(int i=0;i<__n;i++){
ptr[i]=tmp;
}
#endif
return ptr;
}
void deallocate(pointer __p, size_type) {
void deallocate(pointer __p, size_type __n) {
size_type bytes = __n * sizeof(_Tp);
pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes);
#ifdef HAVE_MM_MALLOC_H
_mm_free((void *)__p);
if ( __freeme ) _mm_free((void *)__freeme);
#else
free((void *)__p);
if ( __freeme ) free((void *)__freeme);
#endif
}
void construct(pointer __p, const _Tp& __val) { };

View File

@ -52,7 +52,7 @@ public:
// Physics Grid information.
std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.
std::vector<int> _fdimensions;// Global dimensions of array prior to cb removal
std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal
std::vector<int> _gdimensions;// Global dimensions of array after cb removal
std::vector<int> _ldimensions;// local dimensions of array with processor images removed
std::vector<int> _rdimensions;// Reduced local dimensions with simd lane images and processor images removed
@ -77,7 +77,7 @@ public:
// GridCartesian / GridRedBlackCartesian
////////////////////////////////////////////////////////////////
virtual int CheckerBoarded(int dim)=0;
virtual int CheckerBoard(std::vector<int> &site)=0;
virtual int CheckerBoard(const std::vector<int> &site)=0;
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
@ -121,7 +121,6 @@ public:
Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions);
}
//////////////////////////////////////////////////////////
// SIMD lane addressing
//////////////////////////////////////////////////////////
@ -178,9 +177,11 @@ public:
// Global addressing
////////////////////////////////////////////////////////////////
void GlobalIndexToGlobalCoor(int gidx,std::vector<int> &gcoor){
assert(gidx< gSites());
Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions);
}
void LocalIndexToLocalCoor(int lidx,std::vector<int> &lcoor){
assert(lidx<lSites());
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
}
void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
@ -207,16 +208,16 @@ public:
std::vector<int> lcoor;
GlobalCoorToProcessorCoorLocalCoor(pcoor,lcoor,gcoor);
rank = RankFromProcessorCoor(pcoor);
/*
std::vector<int> cblcoor(lcoor);
for(int d=0;d<cblcoor.size();d++){
if( this->CheckerBoarded(d) ) {
cblcoor[d] = lcoor[d]/2;
}
}
i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim
o_idx= oIndex(lcoor); // this implies divide by 2 on checkerdim
*/
i_idx= iIndex(lcoor);
o_idx= oIndex(lcoor);
}
void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor)

View File

@ -49,7 +49,7 @@ public:
virtual int CheckerBoarded(int dim){
return 0;
}
virtual int CheckerBoard(std::vector<int> &site){
virtual int CheckerBoard(const std::vector<int> &site){
return 0;
}
virtual int CheckerBoardDestination(int cb,int shift,int dim){

View File

@ -49,7 +49,7 @@ public:
if( dim==_checker_dim) return 1;
else return 0;
}
virtual int CheckerBoard(std::vector<int> &site){
virtual int CheckerBoard(const std::vector<int> &site){
int linear=0;
assert(site.size()==_ndimension);
for(int d=0;d<_ndimension;d++){

View File

@ -25,7 +25,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include "Grid.h"
#include <Grid/GridCore.h>
namespace Grid {
///////////////////////////////////////////////////////////////
@ -33,6 +34,7 @@ namespace Grid {
///////////////////////////////////////////////////////////////
void * CartesianCommunicator::ShmCommBuf;
uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 128*1024*1024;
CartesianCommunicator::CommunicatorPolicy_t CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent;
/////////////////////////////////
// Alloc, free shmem region
@ -65,7 +67,6 @@ const std::vector<int> & CartesianCommunicator::ThisProcessorCoor(void) { return
const std::vector<int> & CartesianCommunicator::ProcessorGrid(void) { return _processors; };
int CartesianCommunicator::ProcessorCount(void) { return _Nprocessors; };
////////////////////////////////////////////////////////////////////////////////
// very VERY rarely (Log, serial RNG) we need world without a grid
////////////////////////////////////////////////////////////////////////////////
@ -89,14 +90,17 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L)
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes)
int CartesianCommunicator::NodeCount(void) { return ProcessorCount();};
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes)
{
SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
return 2.0*bytes;
}
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
{

View File

@ -68,8 +68,6 @@ class CartesianCommunicator {
static MPI_Comm communicator_world;
MPI_Comm communicator;
typedef MPI_Request CommsRequest_t;
static char name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name
static int length;
#else
typedef int CommsRequest_t;
#endif
@ -118,6 +116,12 @@ class CartesianCommunicator {
// Implemented in Communicator_base.C
/////////////////////////////////
static void * ShmCommBuf;
// Isend/Irecv/Wait, or Sendrecv blocking
enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential };
static CommunicatorPolicy_t CommunicatorPolicy;
static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; }
size_t heap_top;
size_t heap_bytes;
@ -150,8 +154,8 @@ class CartesianCommunicator {
const std::vector<int> & ThisProcessorCoor(void) ;
const std::vector<int> & ProcessorGrid(void) ;
int ProcessorCount(void) ;
int NodeCount(void) ;
void PrintRankInfo(void) ;
////////////////////////////////////////////////////////////////////////////////
// very VERY rarely (Log, serial RNG) we need world without a grid
////////////////////////////////////////////////////////////////////////////////
@ -203,7 +207,7 @@ class CartesianCommunicator {
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
void StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,

View File

@ -25,7 +25,9 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include "Grid.h"
#include <Grid/GridCore.h>
#include <Grid/GridQCDcore.h>
#include <Grid/qcd/action/ActionCore.h>
#include <mpi.h>
namespace Grid {
@ -35,19 +37,19 @@ namespace Grid {
// Info that is setup once and indept of cartesian layout
///////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Comm CartesianCommunicator::communicator_world;
char CartesianCommunicator::name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name
int CartesianCommunicator::length;
// Should error check all MPI calls.
void CartesianCommunicator::Init(int *argc, char ***argv) {
int flag;
int provided;
MPI_Initialized(&flag); // needed to coexist with other libs apparently
if ( !flag ) {
MPI_Init(argc,argv);
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
if ( provided != MPI_THREAD_MULTIPLE ) {
QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute;
}
}
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
MPI_Get_processor_name(name, &length);
ShmInitGeneric();
}
@ -156,24 +158,34 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
int from,
int bytes)
{
MPI_Request xrq;
MPI_Request rrq;
int rank = _processor;
int myrank = _processor;
int ierr;
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {
MPI_Request xrq;
MPI_Request rrq;
list.push_back(xrq);
list.push_back(rrq);
ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
} else {
// Give the CPU to MPI immediately; can use threads to overlap optionally
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
recv,bytes,MPI_CHAR,from, from,
communicator,MPI_STATUS_IGNORE);
assert(ierr==0);
}
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
int nreq=list.size();
std::vector<MPI_Status> status(nreq);
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
assert(ierr==0);
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {
int nreq=list.size();
std::vector<MPI_Status> status(nreq);
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
assert(ierr==0);
}
}
void CartesianCommunicator::Barrier(void)
@ -210,10 +222,5 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
assert(ierr==0);
}
void CartesianCommunicator::PrintRankInfo(){
std::cout << "Grid: Rank "<< _processor << " - Physical node name: " << name << std::endl;
}
}// end of namespace

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -25,9 +25,23 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include "Grid.h"
#include <Grid/GridCore.h>
#include <mpi.h>
#include <semaphore.h>
#include <fcntl.h>
#include <unistd.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/mman.h>
//#include <zlib.h>
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////
@ -50,6 +64,10 @@ std::vector<int> CartesianCommunicator::GroupRanks;
std::vector<int> CartesianCommunicator::MyGroup;
std::vector<void *> CartesianCommunicator::ShmCommBufs;
int CartesianCommunicator::NodeCount(void) { return GroupSize;};
#undef FORCE_COMMS
void *CartesianCommunicator::ShmBufferSelf(void)
{
return ShmCommBufs[ShmRank];
@ -57,6 +75,9 @@ void *CartesianCommunicator::ShmBufferSelf(void)
void *CartesianCommunicator::ShmBuffer(int rank)
{
int gpeer = GroupRanks[rank];
#ifdef FORCE_COMMS
return NULL;
#endif
if (gpeer == MPI_UNDEFINED){
return NULL;
} else {
@ -65,7 +86,13 @@ void *CartesianCommunicator::ShmBuffer(int rank)
}
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p)
{
static int count =0;
int gpeer = GroupRanks[rank];
assert(gpeer!=ShmRank); // never send to self
assert(rank!=WorldRank);// never send to self
#ifdef FORCE_COMMS
return NULL;
#endif
if (gpeer == MPI_UNDEFINED){
return NULL;
} else {
@ -76,16 +103,27 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p)
}
void CartesianCommunicator::Init(int *argc, char ***argv) {
int flag;
int provided;
// mtrace();
MPI_Initialized(&flag); // needed to coexist with other libs apparently
if ( !flag ) {
MPI_Init(argc,argv);
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
assert (provided == MPI_THREAD_MULTIPLE);
}
Grid_quiesce_nodes();
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
MPI_Comm_rank(communicator_world,&WorldRank);
MPI_Comm_size(communicator_world,&WorldSize);
if ( WorldRank == 0 ) {
std::cout << GridLogMessage<< "Initialising MPI "<< WorldRank <<"/"<<WorldSize <<std::endl;
}
/////////////////////////////////////////////////////////////////////
// Split into groups that can share memory
/////////////////////////////////////////////////////////////////////
@ -131,7 +169,6 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
///////////////////////////////////////////////////////////////////
int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator_world);
assert(ierr==0);
///////////////////////////////////////////////////////////////////
// find the group leaders world rank
///////////////////////////////////////////////////////////////////
@ -141,7 +178,6 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
leaders_group[group++] = l;
}
}
///////////////////////////////////////////////////////////////////
// Identify the rank of the group in which I (and my leader) live
///////////////////////////////////////////////////////////////////
@ -152,39 +188,114 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
}
}
assert(GroupRank!=-1);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the shared window for our group
//////////////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Barrier(ShmComm);
ShmCommBuf = 0;
ierr = MPI_Win_allocate_shared(MAX_MPI_SHM_BYTES,1,MPI_INFO_NULL,ShmComm,&ShmCommBuf,&ShmWindow);
assert(ierr==0);
// KNL hack -- force to numa-domain 1 in flat
#if 0
//#include <numaif.h>
for(uint64_t page=0;page<MAX_MPI_SHM_BYTES;page+=4096){
void *pages = (void *) ( page + ShmCommBuf );
int status;
int flags=MPOL_MF_MOVE_ALL;
int nodes=1; // numa domain == MCDRAM
unsigned long count=1;
ierr= move_pages(0,count, &pages,&nodes,&status,flags);
if (ierr && (page==0)) perror("numa relocate command failed");
}
#endif
MPI_Win_lock_all (MPI_MODE_NOCHECK, ShmWindow);
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Plan: allocate a fixed SHM region. Scratch that is just used via some scheme during stencil comms, with no allocate free.
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
ShmCommBufs.resize(ShmSize);
for(int r=0;r<ShmSize;r++){
MPI_Aint sz;
int dsp_unit;
MPI_Win_shared_query (ShmWindow, r, &sz, &dsp_unit, &ShmCommBufs[r]);
#if 1
char shm_name [NAME_MAX];
if ( ShmRank == 0 ) {
for(int r=0;r<ShmSize;r++){
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES;
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",GroupRank,r);
shm_unlink(shm_name);
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
if ( fd < 0 ) { perror("failed shm_open"); assert(0); }
ftruncate(fd, size);
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
assert(((uint64_t)ptr&0x3F)==0);
ShmCommBufs[r] =ptr;
}
}
MPI_Barrier(ShmComm);
if ( ShmRank != 0 ) {
for(int r=0;r<ShmSize;r++){
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES ;
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",GroupRank,r);
int fd=shm_open(shm_name,O_RDWR,0666);
if ( fd<0 ) { perror("failed shm_open"); assert(0); }
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
assert(((uint64_t)ptr&0x3F)==0);
ShmCommBufs[r] =ptr;
}
}
#else
std::vector<int> shmids(ShmSize);
if ( ShmRank == 0 ) {
for(int r=0;r<ShmSize;r++){
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES;
key_t key = 0x4545 + r;
if ((shmids[r]= shmget(key,size, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
int errsv = errno;
printf("Errno %d\n",errsv);
perror("shmget");
exit(1);
}
printf("shmid: 0x%x\n", shmids[r]);
}
}
MPI_Barrier(ShmComm);
MPI_Bcast(&shmids[0],ShmSize*sizeof(int),MPI_BYTE,0,ShmComm);
MPI_Barrier(ShmComm);
for(int r=0;r<ShmSize;r++){
ShmCommBufs[r] = (uint64_t *)shmat(shmids[r], NULL,0);
if (ShmCommBufs[r] == (uint64_t *)-1) {
perror("Shared memory attach failure");
shmctl(shmids[r], IPC_RMID, NULL);
exit(2);
}
printf("shmaddr: %p\n", ShmCommBufs[r]);
}
MPI_Barrier(ShmComm);
// Mark for clean up
for(int r=0;r<ShmSize;r++){
shmctl(shmids[r], IPC_RMID,(struct shmid_ds *)NULL);
}
MPI_Barrier(ShmComm);
#endif
ShmCommBuf = ShmCommBufs[ShmRank];
MPI_Barrier(ShmComm);
if ( ShmRank == 0 ) {
for(int r=0;r<ShmSize;r++){
uint64_t * check = (uint64_t *) ShmCommBufs[r];
check[0] = GroupRank;
check[1] = r;
check[2] = 0x5A5A5A;
}
}
MPI_Barrier(ShmComm);
for(int r=0;r<ShmSize;r++){
uint64_t * check = (uint64_t *) ShmCommBufs[r];
assert(check[0]==GroupRank);
assert(check[1]==r);
assert(check[2]==0x5A5A5A);
}
MPI_Barrier(ShmComm);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// Verbose for now
//////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -192,7 +303,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
std::cout<<GridLogMessage<< "Grid MPI-3 configuration: detected ";
std::cout<< WorldSize << " Ranks " ;
std::cout<< GroupSize << " Nodes " ;
std::cout<< ShmSize << " with ranks-per-node "<<std::endl;
std::cout<< " with "<< ShmSize << " ranks-per-node "<<std::endl;
std::cout<<GridLogMessage <<"Grid MPI-3 configuration: allocated shared memory region of size ";
std::cout<<std::hex << MAX_MPI_SHM_BYTES <<" ShmCommBuf address = "<<ShmCommBuf << std::dec<<std::endl;
@ -207,7 +318,6 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
if(g!=ShmSize-1) std::cout<<",";
else std::cout<<"}"<<std::endl;
}
}
for(int g=0;g<GroupSize;g++){
@ -216,23 +326,21 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
if ( (ShmRank == 0) && (GroupRank==g) ) {
std::cout<<MyGroup[r];
if(r<ShmSize-1) std::cout<<",";
else std::cout<<"}"<<std::endl;
else std::cout<<"}"<<std::endl<<std::flush;
}
MPI_Barrier(communicator_world);
}
}
assert(ShmSetup==0); ShmSetup=1;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Want to implement some magic ... Group sub-cubes into those on same node
////////////////////////////////////////////////////////////////////////////////////////////////////////////
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &dest,int &source)
{
std::vector<int> coor = _processor_coor;
std::vector<int> coor = _processor_coor; // my coord
assert(std::abs(shift) <_processors[dim]);
coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim];
@ -242,28 +350,32 @@ void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest
coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim];
Lexicographic::IndexFromCoor(coor,dest,_processors);
dest = LexicographicToWorldRank[dest];
}
}// rank is world rank.
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
{
int rank;
Lexicographic::IndexFromCoor(coor,rank,_processors);
rank = LexicographicToWorldRank[rank];
return rank;
}
}// rank is world rank
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
{
Lexicographic::CoorFromIndex(coor,rank,_processors);
rank = LexicographicToWorldRank[rank];
int lr=-1;
for(int r=0;r<WorldSize;r++){// map world Rank to lexico and then to coor
if( LexicographicToWorldRank[r]==rank) lr = r;
}
assert(lr!=-1);
Lexicographic::CoorFromIndex(coor,lr,_processors);
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
int ierr;
communicator=communicator_world;
_ndimension = processors.size();
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
@ -275,24 +387,22 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
}
}
assert(log2size != -1);
////////////////////////////////////////////////////////////////
// Identify subblock of ranks on node spreading across dims
// in a maximally symmetrical way
////////////////////////////////////////////////////////////////
int dim = 0;
std::vector<int> WorldDims = processors;
ShmDims.resize(_ndimension,1);
ShmDims.resize (_ndimension,1);
GroupDims.resize(_ndimension);
ShmCoor.resize(_ndimension);
ShmCoor.resize (_ndimension);
GroupCoor.resize(_ndimension);
WorldCoor.resize(_ndimension);
int dim = 0;
for(int l2=0;l2<log2size;l2++){
while ( WorldDims[dim] / ShmDims[dim] <= 1 ) dim=(dim+1)%_ndimension;
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%_ndimension;
ShmDims[dim]*=2;
dim=(dim+1)%_ndimension;
}
@ -304,6 +414,29 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
GroupDims[d] = WorldDims[d]/ShmDims[d];
}
////////////////////////////////////////////////////////////////
// Verbose
////////////////////////////////////////////////////////////////
#if 0
std::cout<< GridLogMessage << "MPI-3 usage "<<std::endl;
std::cout<< GridLogMessage << "SHM ";
for(int d=0;d<_ndimension;d++){
std::cout<< ShmDims[d] <<" ";
}
std::cout<< std::endl;
std::cout<< GridLogMessage << "Group ";
for(int d=0;d<_ndimension;d++){
std::cout<< GroupDims[d] <<" ";
}
std::cout<< std::endl;
std::cout<< GridLogMessage<<"World ";
for(int d=0;d<_ndimension;d++){
std::cout<< WorldDims[d] <<" ";
}
std::cout<< std::endl;
#endif
////////////////////////////////////////////////////////////////
// Check processor counts match
////////////////////////////////////////////////////////////////
@ -317,29 +450,57 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
////////////////////////////////////////////////////////////////
// Establish mapping between lexico physics coord and WorldRank
//
////////////////////////////////////////////////////////////////
LexicographicToWorldRank.resize(WorldSize,0);
Lexicographic::CoorFromIndex(GroupCoor,GroupRank,GroupDims);
Lexicographic::CoorFromIndex(ShmCoor,ShmRank,ShmDims);
for(int d=0;d<_ndimension;d++){
WorldCoor[d] = GroupCoor[d]*ShmDims[d]+ShmCoor[d];
}
_processor_coor = WorldCoor;
int lexico;
Lexicographic::IndexFromCoor(WorldCoor,lexico,WorldDims);
LexicographicToWorldRank[lexico]=WorldRank;
_processor = lexico;
_processor = WorldRank;
///////////////////////////////////////////////////////////////////
// global sum Lexico to World mapping
///////////////////////////////////////////////////////////////////
int lexico;
LexicographicToWorldRank.resize(WorldSize,0);
Lexicographic::IndexFromCoor(WorldCoor,lexico,WorldDims);
LexicographicToWorldRank[lexico] = WorldRank;
ierr=MPI_Allreduce(MPI_IN_PLACE,&LexicographicToWorldRank[0],WorldSize,MPI_INT,MPI_SUM,communicator);
assert(ierr==0);
};
for(int i=0;i<WorldSize;i++){
int wr = LexicographicToWorldRank[i];
// int wr = i;
std::vector<int> coor(_ndimension);
ProcessorCoorFromRank(wr,coor); // from world rank
int ck = RankFromProcessorCoor(coor);
assert(ck==wr);
if ( wr == WorldRank ) {
for(int j=0;j<coor.size();j++) {
assert(coor[j] == _processor_coor[j]);
}
}
/*
std::cout << GridLogMessage<< " Lexicographic "<<i;
std::cout << " MPI rank "<<wr;
std::cout << " Coor ";
for(int j=0;j<coor.size();j++) std::cout << coor[j];
std::cout<< std::endl;
*/
/////////////////////////////////////////////////////
// Check everyone agrees on everyone elses coords
/////////////////////////////////////////////////////
std::vector<int> mcoor = coor;
this->Broadcast(0,(void *)&mcoor[0],mcoor.size()*sizeof(int));
for(int d = 0 ; d< _ndimension; d++) {
assert(coor[d] == mcoor[d]);
}
}
};
void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0);
@ -367,8 +528,6 @@ void CartesianCommunicator::GlobalSumVector(double *d,int N)
int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0);
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFrom(void *xmit,
int dest,
@ -377,10 +536,14 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
int bytes)
{
std::vector<CommsRequest_t> reqs(0);
// unsigned long xcrc = crc32(0L, Z_NULL, 0);
// unsigned long rcrc = crc32(0L, Z_NULL, 0);
// xcrc = crc32(xcrc,(unsigned char *)xmit,bytes);
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
SendToRecvFromComplete(reqs);
// rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
// printf("proc %d SendToRecvFrom %d bytes %lx %lx\n",_processor,bytes,xcrc,rcrc);
}
void CartesianCommunicator::SendRecvPacket(void *xmit,
void *recv,
int sender,
@ -397,7 +560,6 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
}
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
@ -406,95 +568,29 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
int from,
int bytes)
{
#if 0
this->StencilBarrier();
MPI_Request xrq;
MPI_Request rrq;
static int sequence;
int myrank = _processor;
int ierr;
int tag;
int check;
assert(dest != _processor);
assert(from != _processor);
int gdest = GroupRanks[dest];
int gfrom = GroupRanks[from];
int gme = GroupRanks[_processor];
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {
MPI_Request xrq;
MPI_Request rrq;
sequence++;
char *from_ptr = (char *)ShmCommBufs[ShmRank];
int small = (bytes<MAX_MPI_SHM_BYTES);
typedef uint64_t T;
int words = bytes/sizeof(T);
assert(((size_t)bytes &(sizeof(T)-1))==0);
assert(gme == ShmRank);
if ( small && (gdest !=MPI_UNDEFINED) ) {
char *to_ptr = (char *)ShmCommBufs[gdest];
assert(gme != gdest);
T *ip = (T *)xmit;
T *op = (T *)to_ptr;
PARALLEL_FOR_LOOP
for(int w=0;w<words;w++) {
op[w]=ip[w];
}
bcopy(&_processor,&to_ptr[bytes],sizeof(_processor));
bcopy(& sequence,&to_ptr[bytes+4],sizeof(sequence));
} else {
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
assert(ierr==0);
list.push_back(xrq);
}
this->StencilBarrier();
if (small && (gfrom !=MPI_UNDEFINED) ) {
T *ip = (T *)from_ptr;
T *op = (T *)recv;
PARALLEL_FOR_LOOP
for(int w=0;w<words;w++) {
op[w]=ip[w];
}
bcopy(&from_ptr[bytes] ,&tag ,sizeof(tag));
bcopy(&from_ptr[bytes+4],&check,sizeof(check));
assert(check==sequence);
assert(tag==from);
} else {
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(rrq);
} else {
// Give the CPU to MPI immediately; can use threads to overlap optionally
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
recv,bytes,MPI_CHAR,from, from,
communicator,MPI_STATUS_IGNORE);
assert(ierr==0);
}
this->StencilBarrier();
#else
MPI_Request xrq;
MPI_Request rrq;
int rank = _processor;
int ierr;
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
#endif
}
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
@ -505,57 +601,63 @@ void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_
MPI_Request rrq;
int ierr;
assert(dest != _processor);
assert(from != _processor);
int gdest = GroupRanks[dest];
int gfrom = GroupRanks[from];
int gme = GroupRanks[_processor];
assert(gme == ShmRank);
assert(dest != _processor);
assert(from != _processor);
assert(gme == ShmRank);
double off_node_bytes=0.0;
#ifdef FORCE_COMMS
gdest = MPI_UNDEFINED;
gfrom = MPI_UNDEFINED;
#endif
if ( gfrom ==MPI_UNDEFINED) {
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(rrq);
off_node_bytes+=bytes;
}
if ( gdest == MPI_UNDEFINED ) {
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
assert(ierr==0);
list.push_back(xrq);
}
if ( gfrom ==MPI_UNDEFINED) {
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(rrq);
off_node_bytes+=bytes;
}
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
this->StencilSendToRecvFromComplete(list);
}
return off_node_bytes;
}
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list)
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
{
SendToRecvFromComplete(list);
SendToRecvFromComplete(waitall);
}
void CartesianCommunicator::StencilBarrier(void)
{
MPI_Win_sync (ShmWindow);
MPI_Barrier (ShmComm);
MPI_Win_sync (ShmWindow);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
int nreq=list.size();
if (nreq==0) return;
std::vector<MPI_Status> status(nreq);
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
assert(ierr==0);
list.resize(0);
}
void CartesianCommunicator::Barrier(void)
{
int ierr = MPI_Barrier(communicator);
assert(ierr==0);
}
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
{
int ierr=MPI_Bcast(data,
@ -565,7 +667,11 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
communicator);
assert(ierr==0);
}
int CartesianCommunicator::RankWorld(void){
int r;
MPI_Comm_rank(communicator_world,&r);
return r;
}
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
{
int ierr= MPI_Bcast(data,
@ -576,10 +682,5 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
assert(ierr==0);
}
void CartesianCommunicator::PrintRankInfo(){
std::cout << "Grid: Rank "<< _processor << " - Physical node name: " << name << std::endl;
}
}

View File

@ -27,6 +27,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */
#include "Grid.h"
#include <mpi.h>
//#include <numaif.h>
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Workarounds:
@ -42,19 +43,27 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <fcntl.h>
#include <unistd.h>
#include <limits.h>
typedef sem_t *Grid_semaphore;
#error /*THis is deprecated*/
#if 0
#define SEM_INIT(S) S = sem_open(sem_name,0,0600,0); assert ( S != SEM_FAILED );
#define SEM_INIT_EXCL(S) sem_unlink(sem_name); S = sem_open(sem_name,O_CREAT|O_EXCL,0600,0); assert ( S != SEM_FAILED );
#define SEM_POST(S) assert ( sem_post(S) == 0 );
#define SEM_WAIT(S) assert ( sem_wait(S) == 0 );
#else
#define SEM_INIT(S) ;
#define SEM_INIT_EXCL(S) ;
#define SEM_POST(S) ;
#define SEM_WAIT(S) ;
#endif
#include <sys/mman.h>
namespace Grid {
enum { COMMAND_ISEND, COMMAND_IRECV, COMMAND_WAITALL };
enum { COMMAND_ISEND, COMMAND_IRECV, COMMAND_WAITALL, COMMAND_SENDRECV };
struct Descriptor {
uint64_t buf;
@ -62,6 +71,12 @@ struct Descriptor {
int rank;
int tag;
int command;
uint64_t xbuf;
uint64_t rbuf;
int xtag;
int rtag;
int src;
int dest;
MPI_Request request;
};
@ -94,18 +109,14 @@ public:
void SemInit(void) {
sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank);
// printf("SEM_NAME: %s \n",sem_name);
SEM_INIT(sem_head);
sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank);
// printf("SEM_NAME: %s \n",sem_name);
SEM_INIT(sem_tail);
}
void SemInitExcl(void) {
sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank);
// printf("SEM_INIT_EXCL: %s \n",sem_name);
SEM_INIT_EXCL(sem_head);
sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank);
// printf("SEM_INIT_EXCL: %s \n",sem_name);
SEM_INIT_EXCL(sem_tail);
}
void WakeUpDMA(void) {
@ -125,6 +136,13 @@ public:
while(1){
WaitForCommand();
// std::cout << "Getting command "<<std::endl;
#if 0
_mm_monitor((void *)&state->head,0,0);
int s=state->start;
if ( s != state->head ) {
_mm_mwait(0,0);
}
#endif
Event();
}
}
@ -132,6 +150,7 @@ public:
int Event (void) ;
uint64_t QueueCommand(int command,void *buf, int bytes, int hashtag, MPI_Comm comm,int u_rank) ;
void QueueSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) ;
void WaitAll() {
// std::cout << "Queueing WAIT command "<<std::endl;
@ -141,7 +160,7 @@ public:
// std::cout << "Waiting from semaphore "<<std::endl;
WaitForComplete();
// std::cout << "Checking FIFO is empty "<<std::endl;
assert ( state->tail == state->head );
while ( state->tail != state->head );
}
};
@ -196,6 +215,12 @@ public:
// std::cout << "Waking up DMA "<< slave<<std::endl;
};
static void QueueSendRecv(int slave,void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src)
{
Slaves[slave].QueueSendRecv(xbuf,rbuf,bytes,xtag,rtag,comm,dest,src);
Slaves[slave].WakeUpDMA();
}
static void QueueRecv(int slave, void *buf, int bytes, int tag, MPI_Comm comm,int rank) {
// std::cout<< " Queueing recv "<< bytes<< " slave "<< slave << " from comm "<<rank <<std::endl;
Slaves[slave].QueueCommand(COMMAND_IRECV,buf,bytes,tag,comm,rank);
@ -226,6 +251,28 @@ public:
return;
};
static void QueueRoundRobinSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) {
uint8_t * cxbuf = (uint8_t *) xbuf;
uint8_t * crbuf = (uint8_t *) rbuf;
static int rrp=0;
int procs = VerticalSize-1;
int myoff=0;
int mywork=bytes;
QueueSendRecv(rrp+1,&cxbuf[myoff],&crbuf[myoff],mywork,xtag,rtag,comm,dest,src);
rrp = rrp+1;
if ( rrp == (VerticalSize-1) ) rrp = 0;
}
static void QueueMultiplexedSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) {
uint8_t * cxbuf = (uint8_t *) xbuf;
uint8_t * crbuf = (uint8_t *) rbuf;
int mywork, myoff, procs;
procs = VerticalSize-1;
for(int s=0;s<procs;s++) {
GetWork(bytes,s,mywork,myoff,procs);
QueueSendRecv(s+1,&cxbuf[myoff],&crbuf[myoff],mywork,xtag,rtag,comm,dest,src);
}
};
static void QueueMultiplexedSend(void *buf, int bytes, int tag, MPI_Comm comm,int rank) {
uint8_t * cbuf = (uint8_t *) buf;
int mywork, myoff, procs;
@ -275,6 +322,7 @@ std::vector<void *> MPIoffloadEngine::VerticalShmBufs;
std::vector<std::vector<int> > MPIoffloadEngine::UniverseRanks;
std::vector<int> MPIoffloadEngine::UserCommunicatorToWorldRanks;
int CartesianCommunicator::NodeCount(void) { return HorizontalSize;};
int MPIoffloadEngine::ShmSetup = 0;
void MPIoffloadEngine::CommunicatorInit (MPI_Comm &communicator_world,
@ -370,12 +418,22 @@ void MPIoffloadEngine::CommunicatorInit (MPI_Comm &communicator_world,
ftruncate(fd, size);
VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if ( VerticalShmBufs[r] == MAP_FAILED ) {
perror("failed mmap");
assert(0);
}
/*
for(uint64_t page=0;page<size;page+=4096){
void *pages = (void *) ( page + (uint64_t)VerticalShmBufs[r] );
int status;
int flags=MPOL_MF_MOVE_ALL;
int nodes=1; // numa domain == MCDRAM
unsigned long count=1;
ierr= move_pages(0,count, &pages,&nodes,&status,flags);
if (ierr && (page==0)) perror("numa relocate command failed");
}
*/
uint64_t * check = (uint64_t *) VerticalShmBufs[r];
check[0] = WorldRank;
check[1] = r;
@ -404,7 +462,7 @@ void MPIoffloadEngine::CommunicatorInit (MPI_Comm &communicator_world,
uint64_t * check = (uint64_t *) VerticalShmBufs[r];
assert(check[0]== WorldRank);
assert(check[1]== r);
std::cerr<<"SHM "<<r<<" " <<VerticalShmBufs[r]<<std::endl;
// std::cerr<<"SHM "<<r<<" " <<VerticalShmBufs[r]<<std::endl;
}
}
#endif
@ -542,6 +600,8 @@ int Slave::Event (void) {
static int head_last;
static int start_last;
int ierr;
MPI_Status stat;
static int i=0;
////////////////////////////////////////////////////
// Try to advance the start pointers
@ -550,11 +610,6 @@ int Slave::Event (void) {
if ( s != state->head ) {
switch ( state->Descrs[s].command ) {
case COMMAND_ISEND:
/*
std::cout<< " Send "<<s << " ptr "<< state<<" "<< state->Descrs[s].buf<< "["<<state->Descrs[s].bytes<<"]"
<< " to " << state->Descrs[s].rank<< " tag" << state->Descrs[s].tag
<< " Comm " << MPIoffloadEngine::communicator_universe<< " me " <<universe_rank<< std::endl;
*/
ierr = MPI_Isend((void *)(state->Descrs[s].buf+base),
state->Descrs[s].bytes,
MPI_CHAR,
@ -568,11 +623,6 @@ int Slave::Event (void) {
break;
case COMMAND_IRECV:
/*
std::cout<< " Recv "<<s << " ptr "<< state<<" "<< state->Descrs[s].buf<< "["<<state->Descrs[s].bytes<<"]"
<< " from " << state->Descrs[s].rank<< " tag" << state->Descrs[s].tag
<< " Comm " << MPIoffloadEngine::communicator_universe<< " me "<< universe_rank<< std::endl;
*/
ierr=MPI_Irecv((void *)(state->Descrs[s].buf+base),
state->Descrs[s].bytes,
MPI_CHAR,
@ -588,10 +638,32 @@ int Slave::Event (void) {
return 1;
break;
case COMMAND_SENDRECV:
// fprintf(stderr,"Sendrecv ->%d %d : <-%d %d \n",state->Descrs[s].dest, state->Descrs[s].xtag+i*10,state->Descrs[s].src, state->Descrs[s].rtag+i*10);
ierr=MPI_Sendrecv((void *)(state->Descrs[s].xbuf+base), state->Descrs[s].bytes, MPI_CHAR, state->Descrs[s].dest, state->Descrs[s].xtag+i*10,
(void *)(state->Descrs[s].rbuf+base), state->Descrs[s].bytes, MPI_CHAR, state->Descrs[s].src , state->Descrs[s].rtag+i*10,
MPIoffloadEngine::communicator_universe,MPI_STATUS_IGNORE);
assert(ierr==0);
// fprintf(stderr,"Sendrecv done %d %d\n",ierr,i);
// MPI_Barrier(MPIoffloadEngine::HorizontalComm);
// fprintf(stderr,"Barrier\n");
i++;
state->start = PERI_PLUS(s);
return 1;
break;
case COMMAND_WAITALL:
for(int t=state->tail;t!=s; t=PERI_PLUS(t) ){
MPI_Wait((MPI_Request *)&state->Descrs[t].request,MPI_STATUS_IGNORE);
if ( state->Descrs[t].command != COMMAND_SENDRECV ) {
MPI_Wait((MPI_Request *)&state->Descrs[t].request,MPI_STATUS_IGNORE);
}
};
s=PERI_PLUS(s);
state->start = s;
@ -613,6 +685,45 @@ int Slave::Event (void) {
// External interaction with the queue
//////////////////////////////////////////////////////////////////////////////
void Slave::QueueSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src)
{
int head =state->head;
int next = PERI_PLUS(head);
// Set up descriptor
int worldrank;
int hashtag;
MPI_Comm communicator;
MPI_Request request;
uint64_t relative;
relative = (uint64_t)xbuf - base;
state->Descrs[head].xbuf = relative;
relative= (uint64_t)rbuf - base;
state->Descrs[head].rbuf = relative;
state->Descrs[head].bytes = bytes;
MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,xtag,comm,dest);
state->Descrs[head].dest = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank];
state->Descrs[head].xtag = hashtag;
MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,rtag,comm,src);
state->Descrs[head].src = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank];
state->Descrs[head].rtag = hashtag;
state->Descrs[head].command= COMMAND_SENDRECV;
// Block until FIFO has space
while( state->tail==next );
// Msync on weak order architectures
// Advance pointer
state->head = next;
};
uint64_t Slave::QueueCommand(int command,void *buf, int bytes, int tag, MPI_Comm comm,int commrank)
{
/////////////////////////////////////////
@ -812,19 +923,22 @@ void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_
assert( (recv_i >= shm) && (recv_i+bytes <= shm+MAX_MPI_SHM_BYTES) );
assert(from!=_processor);
assert(dest!=_processor);
MPIoffloadEngine::QueueMultiplexedSend(xmit,bytes,_processor,communicator,dest);
MPIoffloadEngine::QueueMultiplexedRecv(recv,bytes,from,communicator,from);
}
MPIoffloadEngine::QueueMultiplexedSendRecv(xmit,recv,bytes,_processor,from,communicator,dest,from);
//MPIoffloadEngine::QueueRoundRobinSendRecv(xmit,recv,bytes,_processor,from,communicator,dest,from);
//MPIoffloadEngine::QueueMultiplexedSend(xmit,bytes,_processor,communicator,dest);
//MPIoffloadEngine::QueueMultiplexedRecv(recv,bytes,from,communicator,from);
}
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
MPIoffloadEngine::WaitAll();
//this->Barrier();
}
void CartesianCommunicator::StencilBarrier(void)
{
}
void CartesianCommunicator::StencilBarrier(void) { }
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
@ -869,9 +983,6 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
return NULL;
}
void CartesianCommunicator::PrintRankInfo(){
std::cout << "Grid: Rank "<< _processor << " - Physical node name: " << name << std::endl;
}
};

View File

@ -25,7 +25,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include "Grid.h"
#include <Grid/GridCore.h>
namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////
@ -37,11 +38,6 @@ void CartesianCommunicator::Init(int *argc, char *** arv)
ShmInitGeneric();
}
void CartesianCommunicator::PrintRankInfo(){
std::cout << GridLogMessage << "No Rank Info available" << std::endl;
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
_processors = processors;
@ -65,10 +61,10 @@ void CartesianCommunicator::GlobalSum(uint64_t &){}
void CartesianCommunicator::GlobalSumVector(double *,int N){}
void CartesianCommunicator::SendRecvPacket(void *xmit,
void *recv,
int xmit_to_rank,
int recv_from_rank,
int bytes)
void *recv,
int xmit_to_rank,
int recv_from_rank,
int bytes)
{
assert(0);
}
@ -76,22 +72,23 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
// Basic Halo comms primitive -- should never call in single node
void CartesianCommunicator::SendToRecvFrom(void *xmit,
int dest,
void *recv,
int from,
int bytes)
int dest,
void *recv,
int from,
int bytes)
{
assert(0);
}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
assert(0);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
assert(0);
@ -102,7 +99,7 @@ void CartesianCommunicator::Barrier(void){}
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) { return 0;}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){ coor = _processor_coor ;}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){ coor = _processor_coor; }
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
{
source =0;

View File

@ -25,8 +25,9 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include "Grid.h"
#include <Grid/Grid.h>
#include <mpp/shmem.h>
#include <array>
namespace Grid {
@ -51,7 +52,7 @@ typedef struct HandShake_t {
} HandShake;
std::array<long,_SHMEM_REDUCE_SYNC_SIZE> make_psync_init(void) {
array<long,_SHMEM_REDUCE_SYNC_SIZE> ret;
std::array<long,_SHMEM_REDUCE_SYNC_SIZE> ret;
ret.fill(SHMEM_SYNC_VALUE);
return ret;
}
@ -109,7 +110,7 @@ void CartesianCommunicator::GlobalSum(uint32_t &u){
source = u;
dest = 0;
shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all(); // necessary?
u = dest;
}
@ -125,7 +126,7 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){
source = u;
dest = 0;
shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all(); // necessary?
u = dest;
}
@ -137,7 +138,8 @@ void CartesianCommunicator::GlobalSum(float &f){
source = f;
dest =0.0;
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
f = dest;
}
void CartesianCommunicator::GlobalSumVector(float *f,int N)
@ -148,14 +150,16 @@ void CartesianCommunicator::GlobalSumVector(float *f,int N)
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
if ( shmem_addr_accessible(f,_processor) ){
shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync);
shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
return;
}
for(int i=0;i<N;i++){
dest =0.0;
source = f[i];
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
f[i] = dest;
}
}
@ -168,7 +172,8 @@ void CartesianCommunicator::GlobalSum(double &d)
source = d;
dest = 0;
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
d = dest;
}
void CartesianCommunicator::GlobalSumVector(double *d,int N)
@ -180,14 +185,16 @@ void CartesianCommunicator::GlobalSumVector(double *d,int N)
if ( shmem_addr_accessible(d,_processor) ){
shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync);
shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
return;
}
for(int i=0;i<N;i++){
source = d[i];
dest =0.0;
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
d[i] = dest;
}
}
@ -282,11 +289,13 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
SHMEM_VET(recv);
// shmem_putmem_nb(recv,xmit,bytes,dest,NULL);
shmem_putmem(recv,xmit,bytes,dest);
if ( CommunicatorPolicy == CommunicatorPolicySequential ) shmem_barrier_all();
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
// shmem_quiet(); // I'm done
shmem_barrier_all();// He's done too
if( CommunicatorPolicy == CommunicatorPolicyConcurrent ) shmem_barrier_all();// He's done too
}
void CartesianCommunicator::Barrier(void)
{
@ -301,13 +310,13 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
int words = bytes/4;
if ( shmem_addr_accessible(data,_processor) ){
shmem_broadcast32(data,data,words,root,0,0,shmem_n_pes(),psync);
shmem_broadcast32(data,data,words,root,0,0,shmem_n_pes(),psync.data());
return;
}
for(int w=0;w<words;w++){
word = array[w];
shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync);
shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync.data());
if ( shmem_my_pe() != root ) {
array[w] = word;
}
@ -325,17 +334,17 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
for(int w=0;w<words;w++){
word = array[w];
shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync);
shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync.data());
if ( shmem_my_pe() != root ) {
array[w]= word;
}
shmem_barrier_all();
}
}
void CartesianCommunicator::PrintRankInfo(){
std::cout << GridLogMessage << "SHMEM: Rank Info not implemented yet" << std::endl;
}
int CartesianCommunicator::RankWorld(void){
return shmem_my_pe();
}
}

View File

@ -1,5 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -53,15 +52,13 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimen
cbmask = 0x3;
}
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
int stride=rhs._grid->_slice_stride[dimension];
if ( cbmask == 0x3 ) {
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
parallel_for_nest2(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*stride;
int bo = n*e2;
@ -74,14 +71,13 @@ PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*stride;
int ocb=1<<rhs._grid->CheckerBoardFromOindexTable(o+b);
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb &cbmask ) {
table.push_back(std::pair<int,int> (bo++,o+b));
}
}
}
PARALLEL_FOR_LOOP
for(int i=0;i<table.size();i++){
parallel_for(int i=0;i<table.size();i++){
buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
}
}
@ -105,29 +101,30 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
int n1=rhs._grid->_slice_stride[dimension];
int n2=rhs._grid->_slice_block[dimension];
if ( cbmask ==0x3){
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
parallel_for_nest2(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*n1;
int offset = b+n*n2;
int offset = b+n*e2;
cobj temp =compress(rhs._odata[so+o+b]);
extract<cobj>(temp,pointers,offset);
}
}
} else {
assert(0); //Fixme think this is buggy
for(int n=0;n<e1;n++){
// Case of SIMD split AND checker dim cannot currently be hit, except in
// Test_cshift_red_black code.
std::cout << " Dense packed buffer WARNING " <<std::endl;
parallel_for_nest2(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o=n*rhs._grid->_slice_stride[dimension];
int o=n*n1;
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
int offset = b+n*rhs._grid->_slice_block[dimension];
int offset = b+n*e2;
if ( ocb & cbmask ) {
cobj temp =compress(rhs._odata[so+o+b]);
@ -171,10 +168,10 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
int stride=rhs._grid->_slice_stride[dimension];
if ( cbmask ==0x3 ) {
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
parallel_for_nest2(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension];
int bo =n*rhs._grid->_slice_block[dimension];
@ -182,17 +179,21 @@ PARALLEL_NESTED_LOOP2
}
}
} else {
std::vector<std::pair<int,int> > table;
int bo=0;
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension];
int bo =n*rhs._grid->_slice_block[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb & cbmask ) {
rhs._odata[so+o+b]=buffer[bo++];
table.push_back(std::pair<int,int> (so+o+b,bo++));
}
}
}
parallel_for(int i=0;i<table.size();i++){
// std::cout << "Rcv"<< table[i].first << " " << table[i].second << " " <<buffer[table[i].second]<<std::endl;
rhs._odata[table[i].first]=buffer[table[i].second];
}
}
}
@ -213,8 +214,7 @@ PARALLEL_NESTED_LOOP2
int e2=rhs._grid->_slice_block[dimension];
if(cbmask ==0x3 ) {
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
parallel_for_nest2(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*rhs._grid->_slice_stride[dimension];
int offset = b+n*rhs._grid->_slice_block[dimension];
@ -222,7 +222,11 @@ PARALLEL_NESTED_LOOP2
}
}
} else {
assert(0); // think this is buggy FIXME
// Case of SIMD split AND checker dim cannot currently be hit, except in
// Test_cshift_red_black code.
// std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME
std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<<std::endl;
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*rhs._grid->_slice_stride[dimension];
@ -254,8 +258,7 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
int e2=rhs._grid->_slice_block[dimension];
int stride = rhs._grid->_slice_stride[dimension];
if(cbmask == 0x3 ){
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
parallel_for_nest2(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride+b;
@ -264,8 +267,7 @@ PARALLEL_NESTED_LOOP2
}
}
} else {
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
parallel_for_nest2(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride+b;
@ -295,8 +297,8 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block [dimension];
int stride = rhs._grid->_slice_stride[dimension];
PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
parallel_for_nest2(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride;
@ -338,8 +340,8 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
// Map to always positive shift modulo global full dimension.
shift = (shift+fd)%fd;
ret.checkerboard = grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension);
// the permute type
ret.checkerboard = grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension);
int permute_dim =grid->PermuteDim(dimension);
int permute_type=grid->PermuteType(dimension);
int permute_type_dist;
@ -348,7 +350,6 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
int o = 0;
int bo = x * grid->_ostride[dimension];
int cb= (cbmask==0x2)? Odd : Even;
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
@ -361,9 +362,23 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
// wrap is whether sshift > rd.
// num is sshift mod rd.
//
// shift 7
//
// XoXo YcYc
// oXoX cYcY
// XoXo YcYc
// oXoX cYcY
//
// sshift --
//
// XX YY ; 3
// XX YY ; 0
// XX YY ; 3
// XX YY ; 0
//
int permute_slice=0;
if(permute_dim){
int wrap = sshift/rd;
int wrap = sshift/rd; wrap=wrap % ly;
int num = sshift%rd;
if ( x< rd-num ) permute_slice=wrap;
@ -375,7 +390,6 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
} else {
permute_type_dist = permute_type;
}
}
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);

View File

@ -74,7 +74,6 @@ template<class vobj> void Cshift_comms(Lattice<vobj>& ret,const Lattice<vobj> &r
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
// std::cout << "Cshift_comms dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
if ( sshift[0] == sshift[1] ) {
// std::cout << "Single pass Cshift_comms" <<std::endl;
Cshift_comms(ret,rhs,dimension,shift,0x3);
@ -154,10 +153,14 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
(void *)&recv_buf[0],
recv_from_rank,
bytes);
// for(int i=0;i<words;i++){
// std::cout << "SendRecv ["<<i<<"] snd "<<send_buf[i]<<" rcv " << recv_buf[i] << " 0x" << cbmask<<std::endl;
// }
grid->Barrier();
/*
for(int i=0;i<send_buf.size();i++){
assert(recv_buf.size()==buffer_size);
assert(send_buf.size()==buffer_size);
std::cout << "SendRecv_Cshift_comms ["<<i<<" "<< dimension<<"] snd "<<send_buf[i]<<" rcv " << recv_buf[i] << " 0x" << cbmask<<std::endl;
}
*/
Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
}
}
@ -243,7 +246,14 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
(void *)&recv_buf_extract[i][0],
recv_from_rank,
bytes);
/*
for(int w=0;w<recv_buf_extract[i].size();w++){
assert(recv_buf_extract[i].size()==buffer_size);
assert(send_buf_extract[i].size()==buffer_size);
std::cout << "SendRecv_Cshift_comms ["<<w<<" "<< dimension<<"] recv "<<recv_buf_extract[i][w]<<" send " << send_buf_extract[nbr_lane][w] << cbmask<<std::endl;
}
*/
grid->Barrier();
rpointers[i] = &recv_buf_extract[i][0];
} else {
rpointers[i] = &send_buf_extract[nbr_lane][0];

View File

@ -39,8 +39,7 @@ namespace Grid {
ret.checkerboard = lhs.checkerboard;
conformable(ret,rhs);
conformable(lhs,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
mult(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
@ -56,8 +55,7 @@ PARALLEL_FOR_LOOP
ret.checkerboard = lhs.checkerboard;
conformable(ret,rhs);
conformable(lhs,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
mac(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
@ -73,8 +71,7 @@ PARALLEL_FOR_LOOP
ret.checkerboard = lhs.checkerboard;
conformable(ret,rhs);
conformable(lhs,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
sub(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
@ -89,8 +86,7 @@ PARALLEL_FOR_LOOP
ret.checkerboard = lhs.checkerboard;
conformable(ret,rhs);
conformable(lhs,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
add(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
@ -108,8 +104,7 @@ PARALLEL_FOR_LOOP
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
ret.checkerboard = lhs.checkerboard;
conformable(lhs,ret);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
obj1 tmp;
mult(&tmp,&lhs._odata[ss],&rhs);
vstream(ret._odata[ss],tmp);
@ -120,8 +115,7 @@ PARALLEL_FOR_LOOP
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
ret.checkerboard = lhs.checkerboard;
conformable(ret,lhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
obj1 tmp;
mac(&tmp,&lhs._odata[ss],&rhs);
vstream(ret._odata[ss],tmp);
@ -132,8 +126,7 @@ PARALLEL_FOR_LOOP
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
ret.checkerboard = lhs.checkerboard;
conformable(ret,lhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
sub(&tmp,&lhs._odata[ss],&rhs);
@ -147,8 +140,7 @@ PARALLEL_FOR_LOOP
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
ret.checkerboard = lhs.checkerboard;
conformable(lhs,ret);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
add(&tmp,&lhs._odata[ss],&rhs);
@ -166,8 +158,7 @@ PARALLEL_FOR_LOOP
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
mult(&tmp,&lhs,&rhs._odata[ss]);
@ -182,8 +173,7 @@ PARALLEL_FOR_LOOP
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
mac(&tmp,&lhs,&rhs._odata[ss]);
@ -198,8 +188,7 @@ PARALLEL_FOR_LOOP
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
sub(&tmp,&lhs,&rhs._odata[ss]);
@ -213,8 +202,7 @@ PARALLEL_FOR_LOOP
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
#ifdef STREAMING_STORES
obj1 tmp;
add(&tmp,&lhs,&rhs._odata[ss]);
@ -230,8 +218,7 @@ PARALLEL_FOR_LOOP
ret.checkerboard = x.checkerboard;
conformable(ret,x);
conformable(x,y);
PARALLEL_FOR_LOOP
for(int ss=0;ss<x._grid->oSites();ss++){
parallel_for(int ss=0;ss<x._grid->oSites();ss++){
#ifdef STREAMING_STORES
vobj tmp = a*x._odata[ss]+y._odata[ss];
vstream(ret._odata[ss],tmp);
@ -245,8 +232,7 @@ PARALLEL_FOR_LOOP
ret.checkerboard = x.checkerboard;
conformable(ret,x);
conformable(x,y);
PARALLEL_FOR_LOOP
for(int ss=0;ss<x._grid->oSites();ss++){
parallel_for(int ss=0;ss<x._grid->oSites();ss++){
#ifdef STREAMING_STORES
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
vstream(ret._odata[ss],tmp);

View File

@ -121,8 +121,7 @@ public:
assert( (cb==Odd) || (cb==Even));
checkerboard=cb;
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
#ifdef STREAMING_STORES
vobj tmp = eval(ss,expr);
vstream(_odata[ss] ,tmp);
@ -144,8 +143,7 @@ PARALLEL_FOR_LOOP
assert( (cb==Odd) || (cb==Even));
checkerboard=cb;
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
#ifdef STREAMING_STORES
vobj tmp = eval(ss,expr);
vstream(_odata[ss] ,tmp);
@ -167,8 +165,7 @@ PARALLEL_FOR_LOOP
assert( (cb==Odd) || (cb==Even));
checkerboard=cb;
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
#ifdef STREAMING_STORES
//vobj tmp = eval(ss,expr);
vstream(_odata[ss] ,eval(ss,expr));
@ -191,8 +188,7 @@ PARALLEL_FOR_LOOP
checkerboard=cb;
_odata.resize(_grid->oSites());
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
#ifdef STREAMING_STORES
vobj tmp = eval(ss,expr);
vstream(_odata[ss] ,tmp);
@ -213,8 +209,7 @@ PARALLEL_FOR_LOOP
checkerboard=cb;
_odata.resize(_grid->oSites());
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
#ifdef STREAMING_STORES
vobj tmp = eval(ss,expr);
vstream(_odata[ss] ,tmp);
@ -235,8 +230,7 @@ PARALLEL_FOR_LOOP
checkerboard=cb;
_odata.resize(_grid->oSites());
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
vstream(_odata[ss] ,eval(ss,expr));
}
};
@ -258,8 +252,7 @@ PARALLEL_FOR_LOOP
_grid = r._grid;
checkerboard = r.checkerboard;
_odata.resize(_grid->oSites());// essential
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss]=r._odata[ss];
}
}
@ -269,8 +262,7 @@ PARALLEL_FOR_LOOP
virtual ~Lattice(void) = default;
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r;
}
return *this;
@ -279,8 +271,7 @@ PARALLEL_FOR_LOOP
this->checkerboard = r.checkerboard;
conformable(*this,r);
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss];
}
return *this;

View File

@ -45,90 +45,87 @@ namespace Grid {
//////////////////////////////////////////////////////////////////////////
template<class vfunctor,class lobj,class robj>
inline Lattice<vInteger> LLComparison(vfunctor op,const Lattice<lobj> &lhs,const Lattice<robj> &rhs)
{
Lattice<vInteger> ret(rhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=op(lhs._odata[ss],rhs._odata[ss]);
}
return ret;
{
Lattice<vInteger> ret(rhs._grid);
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=op(lhs._odata[ss],rhs._odata[ss]);
}
return ret;
}
//////////////////////////////////////////////////////////////////////////
// compare lattice to scalar
//////////////////////////////////////////////////////////////////////////
template<class vfunctor,class lobj,class robj>
template<class vfunctor,class lobj,class robj>
inline Lattice<vInteger> LSComparison(vfunctor op,const Lattice<lobj> &lhs,const robj &rhs)
{
Lattice<vInteger> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites(); ss++){
ret._odata[ss]=op(lhs._odata[ss],rhs);
}
return ret;
{
Lattice<vInteger> ret(lhs._grid);
parallel_for(int ss=0;ss<lhs._grid->oSites(); ss++){
ret._odata[ss]=op(lhs._odata[ss],rhs);
}
return ret;
}
//////////////////////////////////////////////////////////////////////////
// compare scalar to lattice
//////////////////////////////////////////////////////////////////////////
template<class vfunctor,class lobj,class robj>
template<class vfunctor,class lobj,class robj>
inline Lattice<vInteger> SLComparison(vfunctor op,const lobj &lhs,const Lattice<robj> &rhs)
{
Lattice<vInteger> ret(rhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=op(lhs._odata[ss],rhs);
}
return ret;
{
Lattice<vInteger> ret(rhs._grid);
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=op(lhs._odata[ss],rhs);
}
return ret;
}
//////////////////////////////////////////////////////////////////////////
// Map to functors
//////////////////////////////////////////////////////////////////////////
// Less than
template<class lobj,class robj>
inline Lattice<vInteger> operator < (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
return LLComparison(vlt<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator < (const Lattice<lobj> & lhs, const robj & rhs) {
return LSComparison(vlt<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator < (const lobj & lhs, const Lattice<robj> & rhs) {
return SLComparison(vlt<lobj,robj>(),lhs,rhs);
}
// Less than equal
template<class lobj,class robj>
inline Lattice<vInteger> operator <= (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
return LLComparison(vle<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator <= (const Lattice<lobj> & lhs, const robj & rhs) {
return LSComparison(vle<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator <= (const lobj & lhs, const Lattice<robj> & rhs) {
return SLComparison(vle<lobj,robj>(),lhs,rhs);
}
// Greater than
template<class lobj,class robj>
inline Lattice<vInteger> operator > (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
return LLComparison(vgt<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator > (const Lattice<lobj> & lhs, const robj & rhs) {
return LSComparison(vgt<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator > (const lobj & lhs, const Lattice<robj> & rhs) {
// Less than
template<class lobj,class robj>
inline Lattice<vInteger> operator < (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
return LLComparison(vlt<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator < (const Lattice<lobj> & lhs, const robj & rhs) {
return LSComparison(vlt<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator < (const lobj & lhs, const Lattice<robj> & rhs) {
return SLComparison(vlt<lobj,robj>(),lhs,rhs);
}
// Less than equal
template<class lobj,class robj>
inline Lattice<vInteger> operator <= (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
return LLComparison(vle<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator <= (const Lattice<lobj> & lhs, const robj & rhs) {
return LSComparison(vle<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator <= (const lobj & lhs, const Lattice<robj> & rhs) {
return SLComparison(vle<lobj,robj>(),lhs,rhs);
}
// Greater than
template<class lobj,class robj>
inline Lattice<vInteger> operator > (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
return LLComparison(vgt<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator > (const Lattice<lobj> & lhs, const robj & rhs) {
return LSComparison(vgt<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator > (const lobj & lhs, const Lattice<robj> & rhs) {
return SLComparison(vgt<lobj,robj>(),lhs,rhs);
}
// Greater than equal
}
// Greater than equal
template<class lobj,class robj>
inline Lattice<vInteger> operator >= (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
inline Lattice<vInteger> operator >= (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
return LLComparison(vge<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
@ -136,38 +133,37 @@ PARALLEL_FOR_LOOP
return LSComparison(vge<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator >= (const lobj & lhs, const Lattice<robj> & rhs) {
inline Lattice<vInteger> operator >= (const lobj & lhs, const Lattice<robj> & rhs) {
return SLComparison(vge<lobj,robj>(),lhs,rhs);
}
// equal
template<class lobj,class robj>
inline Lattice<vInteger> operator == (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
inline Lattice<vInteger> operator == (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
return LLComparison(veq<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator == (const Lattice<lobj> & lhs, const robj & rhs) {
inline Lattice<vInteger> operator == (const Lattice<lobj> & lhs, const robj & rhs) {
return LSComparison(veq<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator == (const lobj & lhs, const Lattice<robj> & rhs) {
inline Lattice<vInteger> operator == (const lobj & lhs, const Lattice<robj> & rhs) {
return SLComparison(veq<lobj,robj>(),lhs,rhs);
}
// not equal
template<class lobj,class robj>
inline Lattice<vInteger> operator != (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
inline Lattice<vInteger> operator != (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
return LLComparison(vne<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator != (const Lattice<lobj> & lhs, const robj & rhs) {
inline Lattice<vInteger> operator != (const Lattice<lobj> & lhs, const robj & rhs) {
return LSComparison(vne<lobj,robj>(),lhs,rhs);
}
template<class lobj,class robj>
inline Lattice<vInteger> operator != (const lobj & lhs, const Lattice<robj> & rhs) {
inline Lattice<vInteger> operator != (const lobj & lhs, const Lattice<robj> & rhs) {
return SLComparison(vne<lobj,robj>(),lhs,rhs);
}
}
#endif

View File

@ -34,47 +34,42 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
namespace Grid {
/////////////////////////////////////////////////////
// Non site, reduced locally reduced routines
/////////////////////////////////////////////////////
// localNorm2,
template<class vobj>
/////////////////////////////////////////////////////
// Non site, reduced locally reduced routines
/////////////////////////////////////////////////////
// localNorm2,
template<class vobj>
inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tensor_reduced>
{
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]);
}
return ret;
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]);
}
return ret;
}
// localInnerProduct
template<class vobj>
// localInnerProduct
template<class vobj>
inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs) -> Lattice<typename vobj::tensor_reduced>
{
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=innerProduct(lhs._odata[ss],rhs._odata[ss]);
}
return ret;
}
// outerProduct Scalar x Scalar -> Scalar
// Vector x Vector -> Matrix
template<class ll,class rr>
// outerProduct Scalar x Scalar -> Scalar
// Vector x Vector -> Matrix
template<class ll,class rr>
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))>
{
Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
}
return ret;
}
{
Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
}
return ret;
}
}
#endif

View File

@ -37,8 +37,7 @@ namespace Grid {
inline Lattice<vobj> operator -(const Lattice<vobj> &r)
{
Lattice<vobj> ret(r._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<r._grid->oSites();ss++){
parallel_for(int ss=0;ss<r._grid->oSites();ss++){
vstream(ret._odata[ss], -r._odata[ss]);
}
return ret;
@ -74,8 +73,7 @@ PARALLEL_FOR_LOOP
inline auto operator * (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
{
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
decltype(lhs*rhs._odata[0]) tmp=lhs*rhs._odata[ss];
vstream(ret._odata[ss],tmp);
// ret._odata[ss]=lhs*rhs._odata[ss];
@ -86,8 +84,7 @@ PARALLEL_FOR_LOOP
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs+rhs._odata[0])>
{
Lattice<decltype(lhs+rhs._odata[0])> ret(rhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
decltype(lhs+rhs._odata[0]) tmp =lhs-rhs._odata[ss];
vstream(ret._odata[ss],tmp);
// ret._odata[ss]=lhs+rhs._odata[ss];
@ -98,11 +95,9 @@ PARALLEL_FOR_LOOP
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs-rhs._odata[0])>
{
Lattice<decltype(lhs-rhs._odata[0])> ret(rhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
decltype(lhs-rhs._odata[0]) tmp=lhs-rhs._odata[ss];
vstream(ret._odata[ss],tmp);
// ret._odata[ss]=lhs-rhs._odata[ss];
}
return ret;
}
@ -110,8 +105,7 @@ PARALLEL_FOR_LOOP
inline auto operator * (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
{
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites(); ss++){
decltype(lhs._odata[0]*rhs) tmp =lhs._odata[ss]*rhs;
vstream(ret._odata[ss],tmp);
// ret._odata[ss]=lhs._odata[ss]*rhs;
@ -122,8 +116,7 @@ PARALLEL_FOR_LOOP
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]+rhs)>
{
Lattice<decltype(lhs._odata[0]+rhs)> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
decltype(lhs._odata[0]+rhs) tmp=lhs._odata[ss]+rhs;
vstream(ret._odata[ss],tmp);
// ret._odata[ss]=lhs._odata[ss]+rhs;
@ -134,15 +127,12 @@ PARALLEL_FOR_LOOP
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]-rhs)>
{
Lattice<decltype(lhs._odata[0]-rhs)> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<rhs._grid->oSites(); ss++){
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
decltype(lhs._odata[0]-rhs) tmp=lhs._odata[ss]-rhs;
vstream(ret._odata[ss],tmp);
// ret._odata[ss]=lhs._odata[ss]-rhs;
}
return ret;
}
}
#endif

View File

@ -44,22 +44,20 @@ namespace Grid {
{
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))> ret(lhs._grid);
ret.checkerboard=lhs.checkerboard;
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
}
return ret;
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
}
return ret;
};
template<int Index,class vobj>
auto PeekIndex(const Lattice<vobj> &lhs,int i,int j) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
auto PeekIndex(const Lattice<vobj> &lhs,int i,int j) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
{
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))> ret(lhs._grid);
ret.checkerboard=lhs.checkerboard;
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);
}
return ret;
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);
}
return ret;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -68,25 +66,23 @@ PARALLEL_FOR_LOOP
template<int Index,class vobj>
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0))> & rhs,int i)
{
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i);
}
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i);
}
}
template<int Index,class vobj>
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0,0))> & rhs,int i,int j)
{
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i,j);
}
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i,j);
}
}
//////////////////////////////////////////////////////
// Poke a scalar object into the SIMD array
//////////////////////////////////////////////////////
template<class vobj,class sobj>
void pokeSite(const sobj &s,Lattice<vobj> &l,std::vector<int> &site){
void pokeSite(const sobj &s,Lattice<vobj> &l,const std::vector<int> &site){
GridBase *grid=l._grid;
@ -120,7 +116,7 @@ PARALLEL_FOR_LOOP
// Peek a scalar object from the SIMD array
//////////////////////////////////////////////////////////
template<class vobj,class sobj>
void peekSite(sobj &s,const Lattice<vobj> &l,std::vector<int> &site){
void peekSite(sobj &s,const Lattice<vobj> &l,const std::vector<int> &site){
GridBase *grid=l._grid;
@ -131,9 +127,6 @@ PARALLEL_FOR_LOOP
assert( l.checkerboard == l._grid->CheckerBoard(site));
// FIXME
// assert( sizeof(sobj)*Nsimd == sizeof(vobj));
int rank,odx,idx;
grid->GlobalCoorToRankIndex(rank,odx,idx,site);

View File

@ -40,8 +40,7 @@ namespace Grid {
template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
Lattice<vobj> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = adj(lhs._odata[ss]);
}
return ret;
@ -49,13 +48,10 @@ PARALLEL_FOR_LOOP
template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
Lattice<vobj> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = conjugate(lhs._odata[ss]);
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = conjugate(lhs._odata[ss]);
}
return ret;
};
}
#endif

Some files were not shown because too many files have changed in this diff Show More