1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-13 12:47:05 +01:00

Compare commits

...

331 Commits

Author SHA1 Message Date
c4435e6beb Merge branch 'release/v0.7.0' 2017-05-12 01:15:59 +01:00
7a8f6af5f8 Drop verbose compiler predefine check 2017-05-11 12:48:40 +01:00
49a5d9bac7 Clang major, minor trailing underscore 2017-05-11 12:25:02 +01:00
2b3fdd4a58 Print CXX predefines 2017-05-11 12:05:50 +01:00
34502ec471 4.8 dropped as buggy. 2017-05-11 11:43:39 +01:00
8a43e88b4f Compiler check early in build 2017-05-11 11:43:06 +01:00
238df20370 Still working on the compiler compat checks 2017-05-11 11:30:14 +01:00
97a32a6145 Add 4.8 test 2017-05-11 11:24:21 +01:00
655492a443 Compiler detection 2017-05-11 11:21:11 +01:00
1cab06f6bd Compat checks for compilers 2017-05-11 10:20:24 +01:00
f8024c262b Update Eigen 2017-05-10 13:30:09 +01:00
4cc5f01f4a Small change in the readme about the intel compiler 2017-05-09 15:38:59 +01:00
9c12c37aaf Confirming the fix on the complex boundary conditions 2017-05-09 08:41:29 +01:00
806eaa0530 Adding back the IO tests in the list 2017-05-08 22:26:44 +01:00
01d0e54594 Merge branch 'release/v0.7.0' into develop 2017-05-08 22:02:51 +01:00
5aafa335fe Fixing JSON error for complex numbers 2017-05-08 21:56:44 +01:00
8ba0494485 Fixing JSON for complex numbers 2017-05-08 21:41:39 +01:00
d99d98d9fd Merge branch 'release/v0.7.0' of https://github.com/paboyle/Grid into release/v0.7.0 2017-05-08 15:08:20 -04:00
95a017a4ae Relax force constraints to pass in single precision. 2017-05-08 15:06:41 -04:00
92f92379e6 Adding olivers test version 2017-05-08 18:42:19 +01:00
529e78d43f Restart the v0.7.0 release 2017-05-08 18:20:04 +01:00
4ec746d262 Merge branch 'release/v0.7.0' into develop 2017-05-06 18:43:03 +01:00
51bf1501fc Merge branch 'release/v0.7.0' 2017-05-06 18:42:50 +01:00
66d819c054 More info on gcc bug 2017-05-06 18:42:11 +01:00
3f3686f869 formattign 2017-05-06 18:41:27 +01:00
26bb829f8c Formatting 2017-05-06 18:40:55 +01:00
67cb04fc66 README update 2017-05-06 18:39:54 +01:00
a40bd68aed Version update 2017-05-06 17:00:14 +01:00
36495e0fd2 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-06 16:39:27 +01:00
93f6c15772 Warning squash 2017-05-06 16:38:58 +01:00
cb93eeff21 Update README 2017-05-06 16:28:12 +01:00
c7cc7e6101 Fix 2017-05-06 16:10:09 +01:00
c349aa6511 DEFINE warning elimination 2017-05-06 16:08:35 +01:00
3bae0a2d5c Drop a gcc warning 2017-05-06 15:51:42 +01:00
c1c7566089 GCC bug work around in 5.0 through 6.2 inclusive. 2017-05-06 15:20:25 +01:00
2439999ec8 Warning elimination; drop to -O2 on G++ bad versions 2017-05-06 14:44:49 +01:00
1d96f662e3 Fixed 4d fermion gparity force. Put strong tests on make check force tests 2017-05-06 00:46:31 +01:00
41d1889941 trusty ubuntu 2017-05-05 21:25:35 +01:00
0c3981e0c3 Trying to force recent automake 2017-05-05 21:15:22 +01:00
c727bd4609 Trying to work around automake version 2017-05-05 21:00:00 +01:00
db23749b67 Adding travis to make check 2017-05-05 20:42:08 +01:00
751f2b9703 Better check and benchmark driving 2017-05-05 19:54:38 +01:00
697c0603ce SITMO I/O for NERSC working now bit repro 2017-05-05 16:54:44 +01:00
14bedebb11 Source pointed to 2017-05-05 16:17:27 +01:00
47b5c07ffb Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-05 14:27:02 +01:00
da86a2bf54 Merge branch 'feature/hmc_generalise' into develop 2017-05-05 14:23:02 +01:00
c1cb60a0b3 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-05 14:22:37 +01:00
5ed5b4bfbf Merge branch 'develop' into feature/hmc_generalise 2017-05-05 14:22:33 +01:00
de84aacdfd Fixing a configure error for the smearing tests 2017-05-05 13:59:10 +01:00
2888003765 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-05 13:02:24 +01:00
da06bf5b95 Zmobius force test added 2017-05-05 12:52:45 +01:00
20999c1370 Merge branch 'develop' into feature/hmc_generalise 2017-05-05 12:47:17 +01:00
33f0ed1a33 No compile fix 2017-05-05 11:04:30 +01:00
50be56433b Delete old and defunct tests 2017-05-04 23:41:16 +01:00
43924007db Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-04 19:53:41 +01:00
78ef10e60f Mobius force improvement 2017-05-04 19:53:21 +01:00
679ae98b14 Merge branch 'feature/better-external-library' into develop 2017-05-04 15:42:12 +01:00
90f6bc16bb No compile clang fix 2017-05-04 12:15:06 +01:00
9b5b639546 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-03 20:51:40 -04:00
945767c6d8 More info 2017-05-03 20:26:35 -04:00
422cdf4979 Some checks 2017-05-03 18:37:38 -04:00
38db174f3b Print statement 2017-05-03 18:25:26 -04:00
92e364a35f Better reporting in benchmark for MPI3 2017-05-03 15:43:36 -04:00
58299b8ba2 Git info separated from version in git-config 2017-05-02 20:04:41 +01:00
124bf4d829 git ref in config summary 2017-05-02 19:41:01 +01:00
e8e56b3414 Config summary saved in git-config 2017-05-02 19:40:47 +01:00
89c430136d grid-config program 2017-05-02 19:13:13 +01:00
ea9aef7baa New header for standard headers (was an issue with Remez.h and external compilation) 2017-05-02 18:26:11 +01:00
c9e9e8061d Merge branch 'feature/hadrons' into develop 2017-05-02 18:23:47 +01:00
453cf2a1c6 Moving the topological charge outside the HMC related routines 2017-05-02 14:40:12 +01:00
de7bbfa5f9 Adding ParameterFile option for the HMC 2017-05-02 12:16:16 +01:00
dda8d77c87 Merge branch 'feature/hadrons' into feature/rare_kaon 2017-05-01 17:50:57 +01:00
aa29f4346a Hadrons: weird bus error with recent macOS clang 2017-05-01 17:49:08 +01:00
86116dbed6 Adding boundary condition switch (compile time) for the Mobius HMC example 2017-05-01 16:33:11 +01:00
7bd31e3f7c Adding external file support in the Mobius example (JSON) 2017-05-01 16:30:24 +01:00
74f451715f Fix for Mac compilation on the size_t uint64_t types 2017-05-01 15:12:07 +01:00
655be8ed76 Adding tests for the mobius operator 2017-05-01 14:42:16 +01:00
4063238943 Adding HMC test file example for Mobius + smearing 2017-05-01 13:44:00 +01:00
3344788fa1 Merge branch 'develop' into feature/hmc_generalise 2017-05-01 12:13:56 +01:00
99220f6531 Fixes and better timing 2017-04-26 17:24:11 -04:00
2a6d093749 move the sudo: required to match locatoin on Guido's branch 2017-04-26 09:15:34 +01:00
c947947fad sudo required suggested by guido 2017-04-26 08:45:36 +01:00
f555b50547 Merge branch 'feature/half-prec-comms' into develop 2017-04-26 08:43:40 +01:00
738c1a11c2 longer nloop 2017-04-26 08:43:20 +01:00
f8797e1e3e bug fix. works now and great face performance 2017-04-26 03:14:02 -04:00
fd1eb7de13 Clean implementation of the exterior faces listing only those points on the boudary 2017-04-26 02:34:52 -04:00
2ce898efa3 Pretty code 2017-04-26 02:34:25 -04:00
ab66bac4e6 Think I'm getting on top of the reduced cost exterior precomputed list of links 2017-04-25 08:50:26 +01:00
56277a11c8 Build a list of whats on the surface 2017-04-24 17:06:15 +01:00
916e9e1d3e Merge branch 'feature/half-prec-comms' of https://github.com/paboyle/Grid into feature/half-prec-comms 2017-04-24 10:39:19 +01:00
5b55867a7a Slightly cheaper Ext assembly 2017-04-24 05:36:11 -04:00
3accb1ef89 Debugged assemply split phase with interior suppression 2017-04-23 19:30:19 -04:00
e3d0e31525 Debugged assemply split phase with interior suppression 2017-04-23 19:29:27 -04:00
5812eb8a8c Partially fixed. But the comms-overlap does not work yet. 2017-04-22 18:50:25 -04:00
4dd3763294 Use OMP as much as possible 2017-04-22 20:35:20 +01:00
c429ace748 Cleaner OpenMP use 2017-04-22 20:28:42 +01:00
ac58565d0a Dangerous rewrite of the assembly. If I make a mistake the debug will be painful. 2017-04-22 19:31:04 +01:00
3703b718aa Mark up a table if a given site only receives from itself; including MPI3 splitting info. 2017-04-22 19:28:37 +01:00
b722889234 Try a better load balancing loop 2017-04-22 19:27:41 +01:00
abba44a837 Hand unrolled for overlapped comms 2017-04-22 17:45:17 +01:00
f301be94ce Fixed 2017-04-22 17:42:31 +01:00
1d1b225497 Hand unrolled Nc=3 kernels support split phase compute (on-node, off-node). 2017-04-22 09:05:28 -04:00
53a785a3dd Fixing the KNL compile 2017-04-22 08:11:51 -04:00
736bf3c866 Major rework of stencil. Half precision and MPI3 now working. 2017-04-22 11:33:50 +01:00
b9bbe5d188 L1p config bg/q 2017-04-22 11:33:09 +01:00
3844bcf800 If no f16c instructions supported must use software half precision conversion.
This will also become useful on BG/Q, so will move out from SSE4 into a general area.
Lifted the Eigen half precision from web. Looks sensible, but not extensively regressed
against the intrinsics implementation yet.
2017-04-20 15:30:52 +01:00
e1a2319d01 Simple compressor moved out of cshift into stencil 2017-04-20 13:18:15 +01:00
180c732b4c Move compressors out of Cshift.
Slice iterators would help
2017-04-20 13:17:55 +01:00
957a706d0b Useful script 2017-04-20 13:17:44 +01:00
d2312e9874 Drop compressor entirely from Cshift to only Stencil. 2017-04-20 13:16:55 +01:00
fc4ab9ccd5 Working half precision comms 2017-04-20 11:20:26 +01:00
4a340aa5ca Massive compressor rework to support reduced precision comms 2017-04-20 09:28:27 +01:00
3b7de792d5 Type comparison in the traits work 2017-04-18 13:28:04 +01:00
557c3fa109 Pretty change 2017-04-18 13:27:38 +01:00
ec18e9f7f6 Merge branch 'develop' into feature/half-prec-comms 2017-04-18 11:39:39 +01:00
a839d5bc55 Updated todo list 2017-04-18 11:22:17 +01:00
de41b84c5c Merge branch 'feature/normHP' into develop 2017-04-18 10:57:21 +01:00
8e161152e4 MultiRHS solver improvements with slice operations moved into lattice and sped up.
Block solver requires a lot of performance work.
2017-04-18 10:51:55 +01:00
3141ebac10 MultiRHS working, starting to optimise. Block doesn't and I thought it already was; puzzled. 2017-04-17 10:50:19 +01:00
7ede696126 Non compile of tests fixed 2017-04-16 23:40:00 +01:00
bf516c3b81 higher precision reduction variables in norm and inner product 2017-04-15 12:27:28 +01:00
441a52ee5d First cut at higher precision reduction 2017-04-15 10:57:21 +01:00
a8db024c92 Cleaning up the dense matrix and lanczos sector 2017-04-15 08:54:11 +01:00
a9c22d5f43 Verbose removal 2017-04-14 14:38:49 +01:00
3ca41458a3 Fix to no USE_FP16 case 2017-04-14 14:20:54 +01:00
9e2d29c644 USE_FP16 macro 2017-04-14 14:17:14 +01:00
951be75292 Half precision conversion working on AVX512 now too 2017-04-13 17:35:11 +01:00
b9113ed310 Patches for knl 2017-04-13 12:02:12 -04:00
42fb49d3fd Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-04-13 14:12:47 +01:00
2a54c9aaab Merge branch 'feature/block-cg' into develop 2017-04-13 14:12:24 +01:00
0957378679 Fixing conditional ugly way 2017-04-13 13:47:56 +01:00
2ed6c76fc5 Getting multiline if then fi working 2017-04-13 13:43:13 +01:00
d3b9a7fa14 F16c apparently requires AVX, even if the 128 bit are used.
Seems odd.
2017-04-13 13:19:11 +01:00
75ea306ce9 Another try at travis 2017-04-13 13:05:32 +01:00
4226c633c4 Default to FP16 off again 2017-04-13 12:51:39 +01:00
5a4eafbf7e .travis 2017-04-13 12:50:43 +01:00
eb8e26018b Travis update for macos 2017-04-13 12:35:11 +01:00
db5ea001a3 Update to use Xcode 8.3 since -mfp16 causes SIGILL 2017-04-13 12:22:40 +01:00
2846f079e5 Predicate tests on fp16 being enabled 2017-04-13 12:08:05 +01:00
1d502e4ed6 FP16 optional compile time 2017-04-13 11:55:24 +01:00
73cdf0fffe Drop f16c from SSE because of a macos compile error on travis 2017-04-13 11:23:41 +01:00
1c25773319 Trap illegal instructions 2017-04-13 10:51:40 +01:00
c38400b26f Trap signals 2017-04-13 10:35:20 +01:00
9c3065b860 Debug flags off again 2017-04-13 10:01:32 +01:00
94eb829d08 Align cast fixed for __mm128i gcc complained 2017-04-13 08:40:44 +01:00
68392ddb5b Exchange in generic
Precision change in AVX, SSE, AVX512, Generic. QPX still to do.
2017-04-13 08:38:12 +01:00
cb6b81ae82 Half precision conversion 2017-04-12 19:32:37 +01:00
90ec6eda0c Rare K test solver name fix 2017-04-10 17:48:58 +01:00
fe8d625694 Merge commit '5e477ec553aa48d7d19b5a7c45d41acbb3392bcb' into feature/rare_kaon 2017-04-10 17:23:37 +01:00
53e76b41d2 Merge branch 'develop' into feature/hadrons 2017-04-10 17:00:53 +01:00
8ef4300412 spurious .dirstamp files removed 2017-04-10 17:00:22 +01:00
98a24ebf31 The macro “magics” is very intensive for the preprocessor in the measurement code which has numerous serialisable classes. Reducing the number of serialisable fields to 64 (instead of 1024) helps a lot, this is enough for now and can be extended trivially if needed in the future. 2017-04-10 16:58:54 +01:00
b12dc89d26 Commenting and clean up 2017-04-10 20:38:20 +09:00
d80d802f9d MultiRHS solver test 2017-04-10 00:12:12 +09:00
3d99b09dba Start of blockCG 2017-04-09 23:42:10 +09:00
db5f6d3ae3 Verbose fix 2017-04-09 23:41:30 +09:00
683550f116 Const args improvement 2017-04-09 23:41:04 +09:00
5e477ec553 Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/rare_kaon 2017-04-07 11:51:09 +01:00
55d0329624 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-04-07 11:08:14 +09:00
86aaa35294 Christoph needs SchurDiagTwoKappa which is mobius specific. 2017-04-07 11:07:40 +09:00
172d3dc93a Correcting names in tests 2017-04-05 16:24:04 +01:00
8c540333d5 Merge branch 'develop' into feature/hmc_generalise 2017-04-05 14:41:04 +01:00
5592f7b8c1 Creation mode better implementation 2017-04-05 02:35:34 +09:00
35da4ece0b UID fix 2017-04-05 02:18:15 +09:00
061b15b9e9 Merge branch 'feature/sitmo-skipahead' into develop 2017-04-05 01:24:49 +09:00
ff4e54ef80 Merge branch 'develop' into feature/hadrons 2017-04-03 18:56:21 +01:00
cd1bd921bd Reduced code duplication for Weak Hamiltonian contraction modules 2017-03-30 18:02:14 +01:00
fff5751b1a HADRONS: Updated rare kaon test program, including all contractions. Sink smearing still to be implemented. 2017-03-30 10:57:01 +01:00
2c81696fdd HADRONS: 4pt Weak + current disconnected topology (e.g. for rare neutral kaon decays) 2017-03-30 10:37:17 +01:00
c9dc22efa1 HADRONS: Standalone disconnected loop contraction. 2017-03-30 10:33:18 +01:00
0ab04a000f HADRONS: 3pt contraction with gamma insertion between two propagators. 2017-03-30 10:30:58 +01:00
4c1ea8677e Small cosmetic changes and vscode gitignore 2017-03-23 14:09:35 +09:00
120fb59978 Adding tests for WilsonFlow classes 2017-03-21 16:11:35 +09:00
fd56b3ff38 Merge branch 'develop' into feature/hmc_generalise 2017-03-21 13:33:41 +09:00
0ec6829edc Fixing compilation errors for the WilsonFlow 2017-03-21 13:06:32 +09:00
18b7845b7b Adding WilsonFlow smearing 2017-03-21 11:52:05 +09:00
3d0fe15374 Added topological charge measurement 2017-03-17 16:14:57 +09:00
91886068fe Fixed seg fault for observable modules 2017-03-17 13:59:31 +09:00
6d1e9e5f92 Small cleanup of the observables 2017-03-17 11:42:55 +09:00
b640230b1e Moving hmc observables in a different directory 2017-03-17 11:40:17 +09:00
b3dede4dd3 Merge branch 'develop' into feature/hmc_generalise 2017-03-10 23:57:37 +09:00
4e34132f4d Correcting modules use in test files 2017-03-10 23:54:53 +09:00
c07cb10247 Merge branch 'feature/hmc_generalise' of https://github.com/paboyle/Grid into feature/hmc_generalise 2017-03-10 22:37:25 +09:00
d7767a2a62 Few more tests 2017-03-10 22:33:48 +09:00
ec035983fd Fixing the implicit integration 2017-03-01 11:56:35 +00:00
596dcd85b2 Auxiliary fields 2017-02-27 13:16:38 +00:00
7270c6a150 Integrator works now 2017-02-24 17:03:42 +00:00
f8b9ad7d50 Merge pull request #91 from sunpho84/public_modules_memebers
making public same serializable parameters in HMC Module
2017-02-22 00:53:20 +00:00
04a1959895 Merge pull request #90 from sunpho84/liming
adding --with switch to pass lime path
2017-02-22 00:52:53 +00:00
93cc270016 making public same serializable parameters in HMC Module
RNGModuleParameters
GridModuleParameters
2017-02-21 23:11:56 +01:00
29b60f7e1a adding --with switch to pass lime path 2017-02-21 23:09:39 +01:00
902afcfbaf Adding metric and the implicit steps 2017-02-21 11:30:57 +00:00
97a6b61551 Covariant laplacian and implicit integration 2017-02-20 11:17:27 +00:00
f011bdb869 Fixed overwrite of pminus projection in construction of 4d propagator from 5d. 2017-02-14 14:07:17 +00:00
bafb101e4f Testing different versions of the Laplacian 2017-02-13 15:38:11 +00:00
08fdf05528 Added and tested the covariant laplacian + CG solver 2017-02-13 15:05:01 +00:00
9e72a6b22e Reverting to Xcode 7.3 2017-02-10 12:57:03 +00:00
1c12c5612c Xcode 8.2 for travis 2017-02-10 12:12:01 +00:00
a8193c4bcb Correcting travis compilation on gcc 2017-02-10 10:59:30 +00:00
c3d7ec65fa All tests compile. 2017-02-10 10:27:51 +00:00
8b6a6c8236 Resolving small merge conflict 2017-02-09 16:20:24 +00:00
e0571c872b Merge branch 'develop' into feature/hmc_generalise 2017-02-09 16:12:00 +00:00
c67f41887b Reverting parameters to original 2017-02-09 15:59:56 +00:00
84687ccf1f Handling an Intel compiler warning for Json class 2017-02-09 15:33:33 +00:00
3274561cf8 Cleanup 2017-02-09 15:18:38 +00:00
e08fbb3771 Merge pull request #84 from Lanny91/feature/rare_kaon
Rare Kaon decay contraction code
2017-02-08 08:23:34 -08:00
d7464aa0fe Switched from XmlWriter to CorrWriter in contraction code 2017-02-08 16:13:44 +00:00
00d29153f0 Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/rare_kaon 2017-02-08 16:11:15 +00:00
2ce989f220 Hadrons: default I/O to HDF5 2017-02-08 07:50:05 -08:00
d7a1dc85be Revert "Hadrons: test for rare kaon contraction code."
This reverts commit 1e257a1251.
2017-02-08 13:23:05 +00:00
fc19503673 Removed MSink namespace. 2017-02-08 13:17:39 +00:00
beba824136 Make use of GammaL class in Weak Hamiltonian contractions 2017-02-08 12:45:39 +00:00
6ebf8b12b6 Removed unnecessary repeat of write in Weak Hamiltonian contractions 2017-02-08 12:43:13 +00:00
e5a7ed4362 Moved write outside of loop, some physics corrections 2017-02-08 12:29:33 +00:00
b9f7ea47c3 Access hasModule function directly from Environment instance. 2017-02-08 10:10:06 +00:00
06f7ee202e Revert "Add function to say whether or not a module exists in application class"
This reverts commit 522f6bf91a.
2017-02-08 10:08:18 +00:00
2b2fc6453f Fixed single precision compatibility issues 2017-02-07 13:59:29 +00:00
bdd2765461 Added missing allocation of Weak Hamiltonian result vector 2017-02-07 13:06:42 +00:00
4a45c06dd7 Code cleaning and addition of Weak Hamiltonian contraction log message 2017-02-06 20:12:30 +00:00
d6a7d7d1e0 Hadrons: added missing momentum parameter in rare kaon contraction test 2017-02-06 18:15:49 +00:00
1a122a0dd8 Hadrons: corrected gamma matrix inputs in rare kaon test 2017-02-06 17:35:41 +00:00
20e20733e8 Merge branch 'feature/hadrons' into feature/rare_kaon 2017-02-06 14:12:21 +00:00
b7cd1a19e3 Utilities for reading and writing "pair" objects. 2017-02-06 14:08:59 +00:00
f510002a62 Merge remote-tracking branch 'paboyle/feature/hadrons' into feature/hadrons 2017-02-03 14:37:34 +00:00
1e257a1251 Hadrons: test for rare kaon contraction code. 2017-02-01 16:36:40 +00:00
522f6bf91a Add function to say whether or not a module exists in application class 2017-02-01 16:36:08 +00:00
d35d87d2c2 Weak Hamiltonian Eye-type contraction execution 2017-02-01 16:33:24 +00:00
74a5cda84b Removed unnecessary "3pt" labels 2017-02-01 15:03:49 +00:00
5be05d85b8 Fixed collision of Wall source and sink header ifdefs 2017-02-01 13:56:22 +00:00
35ac85aea8 Updated Weak Hamiltonian contractions to use zero-flop gamma matrices 2017-02-01 12:57:34 +00:00
fa237401ff Consistent variable name in macro 2017-02-01 12:56:55 +00:00
97053adcb5 Merge branch 'feature/hadrons' into feature/rare_kaon 2017-02-01 10:13:29 +00:00
f8fbe4d7a3 Merge remote-tracking branch 'paboyle/feature/hadrons' into feature/hadrons
# Conflicts:
#	extras/Hadrons/Modules/MContraction/Meson.hpp
#	tests/hadrons/Test_hadrons_meson_3pt.cc

Updated Meson.hpp to utilise zero-flop gamma matrices.
2017-02-01 09:27:00 +00:00
ef31c012bf Merge remote-tracking branch 'paboyle/develop' into feature/hadrons 2017-01-31 17:36:10 +00:00
9e9f621d5d Hadrons: added Weak Hamiltonian module dependencies, some reformatting. 2017-01-30 17:54:21 +00:00
651e1a7cbc Hadrons: Momentum inserted as multiples of 2*pi/L 2017-01-30 17:14:33 +00:00
c4d3672720 Hadrons: Momentum projection in meson module. 2017-01-30 17:09:04 +00:00
16be6d378c Now action factory support different Fields (templated) 2017-01-30 14:22:41 +00:00
f05d0565aa Adding ScalarField theory 2017-01-30 10:59:28 +00:00
9bf4108d1f Weak Hamiltonian contraction modules, for Eye and NonEye contraction topologies. Execution for NonEye type diagrams has been implemented, but not yet for Eye type. 2017-01-27 16:58:11 +00:00
6929a84c70 Reformatting files 2017-01-27 11:54:44 +00:00
5c779a789b Moving registrations in an independent file 2017-01-27 11:23:51 +00:00
e863a948e3 Cleaning up files and directories 2017-01-26 15:24:49 +00:00
977f34dca6 Added missing typename 2017-01-26 13:18:33 +00:00
90ad956340 Merge branch 'develop' of https://github.com/paboyle/Grid into feature/rare_kaon 2017-01-26 12:08:41 +00:00
7996f06335 Commented out registrations.
Move to an independent file that is linked only for the factory managed HMC
2017-01-25 18:27:45 +00:00
7b40a3e3e5 Reorganizing files 2017-01-25 18:09:46 +00:00
f7fbbaaca3 Compiles after merging 2017-01-25 12:11:58 +00:00
17629b8d9e Merge branch 'develop' into feature/hmc_generalise 2017-01-25 11:33:53 +00:00
0baa20d292 Againg fixing compilation on Travis, no LIME lib present 2017-01-25 11:18:44 +00:00
4571c918a4 Fixing compilation error when compiling without LIME 2017-01-25 11:14:43 +00:00
5251ea4d30 Adding more fermion action modules, generalised DWF 2017-01-25 11:10:44 +00:00
7f456b4173 👷 Added all pseudofermion actions to the serialiser 2017-01-24 13:57:32 +00:00
c291ef77b5 Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/hadrons 2017-01-23 15:24:47 +00:00
7dd2764bb2 Wall sink smearing 2017-01-23 15:17:54 +00:00
244f8fb6dc Added JSON parser (without NextElement) 2017-01-23 14:57:38 +00:00
27dfe816fa Added TwoFlavorsEO
Had to remove a conformability check in the Derivative of SchurDiff,
see the comments in the file
2017-01-20 16:59:31 +00:00
af29be2c90 Simplified operation of meson module. Result has been modified to output one contraction at a time for each pair of gamma insertions at source and sink. 2017-01-20 16:38:50 +00:00
f96fac0aee All functionalities ready.
Todo: add all the fermion action modules
2017-01-20 12:56:20 +00:00
07f2ebea1b Meson module now takes list of gamma matrices to insert at source and sink. 2017-01-19 22:18:42 +00:00
851f2ad8ef Adding fermions actions support in the factories 2017-01-19 10:00:02 +00:00
23e0561dd6 Added all required functionalities, time for cleaning
All actions to be added
2017-01-18 16:31:51 +00:00
8ae1a95ec6 Legal banners and module descriptions 2017-01-17 18:14:20 +00:00
82b7d4eaf0 Added noise loop dependencies 2017-01-17 15:58:32 +00:00
78774fbdc0 Construct loop propagator 2017-01-17 15:29:45 +00:00
924130833e Moved more parameters to serialization 2017-01-17 13:22:18 +00:00
0157274762 HMC factories 2017-01-17 10:46:49 +00:00
87e8aad5a0 Added support for input file HMC modules (missing the actions yet) 2017-01-16 16:07:12 +00:00
c6f59c2933 Adding factories 2017-01-16 10:18:09 +00:00
b7f90aa011 Added momentum choice for wall source 2017-01-13 15:54:19 +00:00
f22b79da8f Added missing type aliases 2017-01-12 12:52:12 +00:00
3855673ebf Added header for wall source 2017-01-12 11:42:37 +00:00
4db82da0db Wall sources 2017-01-12 11:41:10 +00:00
0cdc3d2fa5 Merge remote-tracking branch 'refs/remotes/paboyle/feature/hadrons' into feature/hadrons 2017-01-12 11:26:55 +00:00
0dfda4bb90 Working on the RNGModule 2017-01-09 11:06:18 +00:00
1189ebc8b5 Cleaning up the checkpointers interface 2017-01-05 15:52:52 +00:00
1bb8578173 Added module for checkpointers 2017-01-05 13:09:32 +00:00
5214846341 Adding a resource manager 2016-12-22 12:41:56 +00:00
ce1a115e0b Removing redundant arguments for integrator functions, step 1 2016-12-20 17:51:30 +00:00
0bd296dda4 Adding check of the Dag part in the benchmark 2016-12-14 03:15:09 +00:00
af0ccdd8e9 Moving output order 2016-12-14 02:02:42 +00:00
2fb92dbc6e Cleaning up previous debug lines 2016-12-13 07:53:43 +00:00
5c74b6028b Commit for debugging, lot of IO 2016-12-13 06:35:30 +00:00
e0be2b6e6c Adding a new tests for the Ls vec CG 2016-12-13 04:59:18 +00:00
ef72f322d2 consistency of tests 2016-12-13 02:24:20 +00:00
7bc2065113 Adding report at the end of the DWF HMC tests 2016-12-12 04:21:34 +00:00
2bd4233919 Completed testing of the HMC for Ls vectorised version (on AVX2) 2016-12-07 04:56:37 +00:00
143c70e29f Debugged the threaded version. Cleaning up 2016-12-07 04:40:25 +00:00
b812d5e39c Added single threaded version of the derivative for the Ls vectorised DWF 2016-12-06 16:31:13 +00:00
01480da0a8 Merge branch 'develop' into feature/hmc_generalise 2016-12-05 05:10:27 +00:00
62749d05a6 Naming the scalar action 2016-11-17 12:26:20 +00:00
3834feb4b7 Adding action names 2016-11-16 16:46:49 +00:00
454302414d Small modif at the test hmc 2016-11-15 12:31:13 +00:00
6f8b771a37 Adding date of the last commit 2016-11-10 18:52:00 +00:00
4e1ffdd17c Adding git info to the configure output 2016-11-10 18:44:36 +00:00
a783282b8b Merge branch 'develop' into feature/hmc_generalise 2016-11-10 18:13:07 +00:00
19b85d8486 Some comments in the hmc files 2016-11-10 17:55:58 +00:00
c363bdd784 Merge branch 'release/v0.6.0' 2016-11-09 12:43:14 +00:00
1d666771f9 Debugging the RNG, eliminate the barrier after broadcast 2016-10-26 16:08:23 +01:00
d50055cd96 Making the ILDG support optional 2016-10-26 09:48:01 +01:00
47c7159177 ILDG reader/writer works
Fill the xml header with the required information, todo.
2016-10-24 21:57:54 +01:00
f415db583a Adding ILDG format 2016-10-24 15:48:22 +01:00
f55c16f984 Adding a barrier in the RNG save 2016-10-24 11:02:14 +01:00
df67e013ca More debug output for the RNG 2016-10-22 13:34:17 +01:00
3e990c9d0a Reverting the broadcast change 2016-10-22 13:26:43 +01:00
4b740fc8fd Debugging the RNG state save 2016-10-22 13:06:00 +01:00
cccd14b09e Small cleanup 2016-10-21 17:20:54 +01:00
e6acffdfc2 Fixing the plaquette computation 2016-10-21 16:06:34 +01:00
392130a537 Working on the 5d 2016-10-21 14:22:25 +01:00
deef2673b2 Separating the Lattice theories stub from the QCD.h file 2016-10-20 17:24:08 +01:00
977b0a6dd9 Merge branch 'develop' into feature/hmc_generalise 2016-10-20 17:04:41 +01:00
977d844394 Few modifications on stdout messages 2016-10-20 17:01:59 +01:00
590675e2ca Csum in hex format 2016-10-19 17:26:25 +01:00
8c65bdf6d3 Printing checksum for the RNG file 2016-10-19 16:56:11 +01:00
74f1ed3bc5 Adding some documentation for HMC 2016-10-19 10:51:13 +01:00
79270ef510 Added a test for EODWF Scaled Shamir with general HMC 2016-10-14 17:34:26 +01:00
e250e6b7bb Moving parameters outside of the HMCrunner 2016-10-14 17:22:32 +01:00
261342c15f Adding gh-pages 2016-10-13 11:51:25 +01:00
eda4dd622e Some more edit 2016-10-11 15:45:20 +01:00
c68a2b9637 Minor fix 2016-10-10 11:54:58 +01:00
293df6cd20 Generalising the HMCRunner and moving parameters to the user level 2016-10-10 11:49:55 +01:00
65f61bb3bf Reset QCD colours to 3 2016-10-10 09:46:17 +01:00
26b9740d53 Some fix for the GenericHMCrunner 2016-10-10 09:43:05 +01:00
6eb873dd96 Added scalar action phi^4
Check Norm2 output (Complex type assumption)
2016-10-07 17:28:46 +01:00
11b4c80b27 Added support for hmc and binary IO for a general field 2016-10-07 13:37:29 +01:00
c065e454c3 Adding Binrary IO, untested 2016-10-06 10:12:11 +01:00
d9b5fbd374 In the middle of adding a general binary writer 2016-10-04 11:24:08 +01:00
cfbc1a26b8 Now the gauge implementation has to take care of the Nexp 2016-10-03 16:20:06 +01:00
257f69f931 One more function to generalise the HMC integrator 2016-10-03 15:50:04 +01:00
e415260961 First cut on generalised HMC
Backward compatibility OK
2016-10-03 15:28:00 +01:00
446c768cd3 Merge branch 'hotfix/v0.5.1'
Double precision compile fix
2016-07-01 16:33:59 +01:00
254 changed files with 30553 additions and 6788 deletions

8
.gitignore vendored
View File

@ -92,6 +92,7 @@ build*/*
#####################
*.xcodeproj/*
build.sh
.vscode
# Eigen source #
################
@ -106,6 +107,10 @@ lib/fftw/*
m4/lt*
m4/libtool.m4
# github pages #
################
gh-pages/
# Buck files #
##############
.buck*
@ -116,4 +121,5 @@ make-bin-BUCK.sh
# generated sources #
#####################
lib/qcd/spin/gamma-gen/*.h
lib/qcd/spin/gamma-gen/*.cc
lib/qcd/spin/gamma-gen/*.cc

View File

@ -7,9 +7,11 @@ cache:
matrix:
include:
- os: osx
osx_image: xcode7.2
osx_image: xcode8.3
compiler: clang
- compiler: gcc
dist: trusty
sudo: required
addons:
apt:
sources:
@ -24,6 +26,8 @@ matrix:
- binutils-dev
env: VERSION=-4.9
- compiler: gcc
dist: trusty
sudo: required
addons:
apt:
sources:
@ -38,6 +42,7 @@ matrix:
- binutils-dev
env: VERSION=-5
- compiler: clang
dist: trusty
addons:
apt:
sources:
@ -52,6 +57,7 @@ matrix:
- binutils-dev
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
- compiler: clang
dist: trusty
addons:
apt:
sources:
@ -73,13 +79,15 @@ before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
install:
- export CC=$CC$VERSION
- export CXX=$CXX$VERSION
- echo $PATH
- which autoconf
- autoconf --version
- which automake
- automake --version
- which $CC
- $CC --version
- which $CXX
@ -92,15 +100,15 @@ script:
- cd build
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
- make -j4
- ./benchmarks/Benchmark_dwf --threads 1
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
- echo make clean
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
- make -j4
- ./benchmarks/Benchmark_dwf --threads 1
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
- make check
- echo make clean
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
- make -j4
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto ; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then make -j4; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi

View File

@ -3,10 +3,15 @@ SUBDIRS = lib benchmarks tests extras
include $(top_srcdir)/doxygen.inc
tests: all
$(MAKE) -C tests tests
bin_SCRIPTS=grid-config
.PHONY: tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL)
.PHONY: bench check tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL)
tests-local: all
bench-local: all
check-local: all
AM_CXXFLAGS += -I$(top_builddir)/include
ACLOCAL_AMFLAGS = -I m4

View File

@ -22,6 +22,26 @@ Last update Nov 2016.
_Please do not send pull requests to the `master` branch which is reserved for releases._
### Compilers
Intel ICPC v16.0.3 and later
Clang v3.5 and later (need 3.8 and later for OpenMP)
GCC v4.9.x (recommended)
GCC v6.3 and later
### Important:
Some versions of GCC appear to have a bug under high optimisation (-O2, -O3).
The safety of these compiler versions cannot be guaranteed at this time. Follow Issue 100 for details and updates.
GCC v5.x
GCC v6.1, v6.2
### Bug report
_To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._
@ -32,7 +52,7 @@ When you file an issue, please go though the following checklist:
2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler.
3. Give the exact `configure` command used.
4. Attach `config.log`.
5. Attach `config.summary`.
5. Attach `grid.config.summary`.
6. Attach the output of `make V=1`.
7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example.
@ -95,10 +115,10 @@ install Grid. Other options are detailed in the next section, you can also use `
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
customise the build.
Finally, you can build and install Grid:
Finally, you can build, check, and install Grid:
``` bash
make; make install
make; make check; make install
```
To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute:
@ -121,7 +141,7 @@ If you want to build all the tests at once just use `make tests`.
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes).
- `--enable-precision={single|double}`: set the default precision (default: `double`).
- `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
- `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `).
- `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `).
- `--disable-timers`: disable system dependent high-resolution timers.
- `--enable-chroma`: enable Chroma regression tests.
- `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`)
@ -159,7 +179,6 @@ Alternatively, some CPU codenames can be directly used:
| `<code>` | Description |
| ----------- | -------------------------------------- |
| `KNC` | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
| `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |
| `BGQ` | Blue Gene/Q |

61
TODO
View File

@ -1,6 +1,26 @@
TODO:
---------------
Peter's work list:
2)- Precision conversion and sort out localConvert <--
3)- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- started
4)- Binary I/O speed up & x-strips
-- Profile CG, BlockCG, etc... Flop count/rate -- PARTIAL, time but no flop/s yet
-- Physical propagator interface
-- Conserved currents
-- GaugeFix into central location
-- Multigrid Wilson and DWF, compare to other Multigrid implementations
-- HDCR resume
Recent DONE
-- Cut down the exterior overhead <-- DONE
-- Interior legs from SHM comms <-- DONE
-- Half-precision comms <-- DONE
-- Merge high precision reduction into develop
-- multiRHS DWF; benchmark on Cori/BNL for comms elimination
-- slice* linalg routines for multiRHS, BlockCG
-----
* Forces; the UdSdU term in gauge force term is half of what I think it should
be. This is a consequence of taking ONLY the first term in:
@ -21,16 +41,8 @@ TODO:
This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two.
This 2x is applied by hand in the fermion routines and in the Test_rect_force routine.
Policies:
* Link smearing/boundary conds; Policy class based implementation ; framework more in place
* Support different boundary conditions (finite temp, chem. potential ... )
* Support different fermion representations?
- contained entirely within the integrator presently
- Sign of force term.
- Reversibility test.
@ -41,11 +53,6 @@ Policies:
- Audit oIndex usage for cb behaviour
- Rectangle gauge actions.
Iwasaki,
Symanzik,
... etc...
- Prepare multigrid for HMC. - Alternate setup schemes.
- Support for ILDG --- ugly, not done
@ -55,9 +62,11 @@ Policies:
- FFTnD ?
- Gparity; hand opt use template specialisation elegance to enable the optimised paths ?
- Gparity force term; Gparity (R)HMC.
- Random number state save restore
- Mobius implementation clean up to rmove #if 0 stale code sequences
- CG -- profile carefully, kernel fusion, whole CG performance measurements.
================================================================
@ -90,6 +99,7 @@ Insert/Extract
Not sure of status of this -- reverify. Things are working nicely now though.
* Make the Tensor types and Complex etc... play more nicely.
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
want to introduce a syntax that does not require this.
@ -112,6 +122,8 @@ Not sure of status of this -- reverify. Things are working nicely now though.
RECENT
---------------
- Support different fermion representations? -- DONE
- contained entirely within the integrator presently
- Clean up HMC -- DONE
- LorentzScalar<GaugeField> gets Gauge link type (cleaner). -- DONE
- Simplified the integrators a bit. -- DONE
@ -123,6 +135,26 @@ RECENT
- Parallel io improvements -- DONE
- Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE
DONE:
- MultiArray -- MultiRHS done
- ConjugateGradientMultiShift -- DONE
- MCR -- DONE
- Remez -- Mike or Boost? -- DONE
- Proto (ET) -- DONE
- uBlas -- DONE ; Eigen
- Potentially Useful Boost libraries -- DONE ; Eigen
- Aligned allocator; memory pool -- DONE
- Multiprecision -- DONE
- Serialization -- DONE
- Regex -- Not needed
- Tokenize -- Why?
- Random number state save restore -- DONE
- Rectangle gauge actions. -- DONE
Iwasaki,
Symanzik,
... etc...
Done: Cayley, Partial , ContFrac force terms.
DONE
@ -207,6 +239,7 @@ Done
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
======================================================================================================
* Link smearing/boundary conds; Policy class based implementation ; framework more in place -- DONE
* Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE
user pass these? Is this a QCD specific?

View File

@ -1,6 +1,5 @@
Version : 0.6.0
Version : 0.7.0
- AVX512, AVX2, AVX, SSE good
- Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above
- MPI and MPI3
- HiRep, Smearing, Generic gauge group
- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended
- MPI and MPI3 comms optimisations for KNL and OPA finished
- Half precision comms

View File

@ -1,28 +1,22 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_dwf.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
@ -151,9 +145,7 @@ int main (int argc, char ** argv)
RealD M5 =1.8;
RealD NP = UGrid->_Nprocessors;
std::cout << GridLogMessage << "Creating action operator " << std::endl;
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
RealD NN = UGrid->NodeCount();
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
@ -163,16 +155,22 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
int ncall =1000;
if (1) {
FGrid->Barrier();
Dw.ZeroCounters();
Dw.Dhop(src,result,0);
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
@ -190,6 +188,7 @@ int main (int argc, char ** argv)
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
@ -206,6 +205,34 @@ int main (int argc, char ** argv)
Dw.Report();
}
DomainWallFermionRL DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
if (1) {
FGrid->Barrier();
DwH.ZeroCounters();
DwH.Dhop(src,result,0);
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
DwH.Dhop(src,result,0);
__SSC_STOP;
}
double t1=usecond();
FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
assert (norm2(err)< 1.0e-3 );
DwH.Report();
}
if (1)
{
@ -214,6 +241,10 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
@ -245,6 +276,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
// std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
sDw.Report();
RealD sum=0;
@ -277,6 +309,10 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric )
std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)
@ -316,6 +352,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "sDeo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
sDw.Report();
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
@ -394,14 +431,15 @@ int main (int argc, char ** argv)
// S-direction is INNERMOST and takes no part in the parity.
static int Opt; // these are a temporary hack
static int Comms; // these are a temporary hack
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
@ -422,6 +460,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
Dw.Report();
}
Dw.DhopEO(src_o,r_e,DaggerNo);
@ -453,3 +492,4 @@ int main (int argc, char ** argv)
Grid_finalize();
}

View File

@ -35,8 +35,9 @@ using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
#define LMAX (32)
int Nloop=1000;
int Nloop=200;
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
@ -50,7 +51,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=32;lat+=2){
for(int lat=2;lat<=LMAX;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -82,7 +83,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=32;lat+=2){
for(int lat=2;lat<=LMAX;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -113,7 +114,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=32;lat+=2){
for(int lat=2;lat<=LMAX;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -144,7 +145,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=32;lat+=2){
for(int lat=2;lat<=LMAX;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];

View File

@ -1,11 +1,7 @@
include Make.inc
simple: simple_su3_test.o simple_su3_expr.o simple_simd_test.o
EXTRA_LIBRARIES = libsimple_su3_test.a libsimple_su3_expr.a libsimple_simd_test.a
libsimple_su3_test_a_SOURCES = simple_su3_test.cc
libsimple_su3_expr_a_SOURCES = simple_su3_expr.cc
libsimple_simd_test_a_SOURCES = simple_simd_test.cc
bench-local: all
./Benchmark_su3
./Benchmark_memory_bandwidth
./Benchmark_wilson
./Benchmark_dwf --dslash-unroll

View File

@ -1,6 +1,6 @@
#!/usr/bin/env bash
]#!/usr/bin/env bash
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2'
echo "-- deploying Eigen source..."
wget ${EIGEN_URL} --no-check-certificate

View File

@ -1,16 +1,19 @@
AC_PREREQ([2.63])
AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid])
AC_INIT([Grid], [0.7.0], [https://github.com/paboyle/Grid], [Grid])
AC_CANONICAL_BUILD
AC_CANONICAL_HOST
AC_CANONICAL_TARGET
AM_INIT_AUTOMAKE(subdir-objects)
AM_INIT_AUTOMAKE([subdir-objects 1.13])
AM_EXTRA_RECURSIVE_TARGETS([tests bench])
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_SRCDIR([lib/Grid.h])
AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
################ Get git info
#AC_REVISION([m4_esyscmd_s([./scripts/configure.commit])])
############### Checks for programs
CXXFLAGS="-O3 $CXXFLAGS"
AC_PROG_CXX
AC_PROG_RANLIB
@ -24,12 +27,15 @@ AX_GXX_VERSION
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
[version of g++ that will compile the code])
CXXFLAGS="-O3 $CXXFLAGS"
############### Checks for typedefs, structures, and compiler characteristics
AC_TYPE_SIZE_T
AC_TYPE_UINT32_T
AC_TYPE_UINT64_T
############### OpenMP
############### OpenMP
AC_OPENMP
ac_openmp=no
if test "${OPENMP_CXXFLAGS}X" != "X"; then
@ -60,16 +66,23 @@ AC_ARG_WITH([mpfr],
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
############### FFTW3
AC_ARG_WITH([fftw],
############### FFTW3
AC_ARG_WITH([fftw],
[AS_HELP_STRING([--with-fftw=prefix],
[try this for a non-standard install prefix of the FFTW3 library])],
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
############### lapack
############### LIME
AC_ARG_WITH([lime],
[AS_HELP_STRING([--with-lime=prefix],
[try this for a non-standard install prefix of the LIME library])],
[AM_CXXFLAGS="-I$with_lime/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_lime/lib $AM_LDFLAGS"])
############### lapack
AC_ARG_ENABLE([lapack],
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
[ac_LAPACK=${enable_lapack}], [ac_LAPACK=no])
case ${ac_LAPACK} in
@ -83,6 +96,18 @@ case ${ac_LAPACK} in
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
esac
############### FP16 conversions
AC_ARG_ENABLE([sfw-fp16],
[AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])],
[ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes])
case ${ac_SFW_FP16} in
yes)
AC_DEFINE([SFW_FP16],[1],[software conversion to fp16]);;
no);;
*)
AC_MSG_ERROR(["SFW FP16 option not supported ${ac_SFW_FP16}"]);;
esac
############### MKL
AC_ARG_ENABLE([mkl],
[AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])],
@ -108,7 +133,7 @@ AC_ARG_WITH([hdf5],
############### first-touch
AC_ARG_ENABLE([numa],
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
[ac_NUMA=${enable_NUMA}],[ac_NUMA=no])
case ${ac_NUMA} in
@ -134,8 +159,8 @@ if test "${ac_MKL}x" != "nox"; then
fi
AC_SEARCH_LIBS([__gmpf_init], [gmp],
[AC_SEARCH_LIBS([mpfr_init], [mpfr],
[AC_DEFINE([HAVE_LIBMPFR], [1],
[AC_SEARCH_LIBS([mpfr_init], [mpfr],
[AC_DEFINE([HAVE_LIBMPFR], [1],
[Define to 1 if you have the `MPFR' library])]
[have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])]
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])]
@ -144,7 +169,7 @@ AC_SEARCH_LIBS([__gmpf_init], [gmp],
if test "${ac_LAPACK}x" != "nox"; then
AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [],
[AC_MSG_ERROR("LAPACK enabled but library not found")])
fi
fi
AC_SEARCH_LIBS([fftw_execute], [fftw3],
[AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [],
@ -152,6 +177,14 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3],
[AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])]
[have_fftw=true])
AC_SEARCH_LIBS([limeCreateReader], [lime],
[AC_DEFINE([HAVE_LIME], [1], [Define to 1 if you have the `LIME' library])]
[have_lime=true],
[AC_MSG_WARN(C-LIME library was not found in your system.
In order to use ILGG file format please install or provide the correct path to your installation
Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)])
AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp],
[AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])]
[have_hdf5=true]
@ -176,19 +209,26 @@ case ${ax_cv_cxx_compiler_vendor} in
case ${ac_SIMD} in
SSE4)
AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
SIMD_FLAGS='-msse4.2';;
case ${ac_SFW_FP16} in
yes)
SIMD_FLAGS='-msse4.2';;
no)
SIMD_FLAGS='-msse4.2 -mf16c';;
*)
AC_MSG_ERROR(["SFW_FP16 must be either yes or no value ${ac_SFW_FP16} "]);;
esac;;
AVX)
AC_DEFINE([AVX1],[1],[AVX intrinsics])
SIMD_FLAGS='-mavx';;
SIMD_FLAGS='-mavx -mf16c';;
AVXFMA4)
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
SIMD_FLAGS='-mavx -mfma4';;
SIMD_FLAGS='-mavx -mfma4 -mf16c';;
AVXFMA)
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3])
SIMD_FLAGS='-mavx -mfma';;
SIMD_FLAGS='-mavx -mfma -mf16c';;
AVX2)
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
SIMD_FLAGS='-mavx2 -mfma';;
SIMD_FLAGS='-mavx2 -mfma -mf16c';;
AVX512)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
@ -297,7 +337,7 @@ case ${ac_COMMS} in
comms_type='shmem'
;;
*)
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
;;
esac
case ${ac_COMMS} in
@ -334,7 +374,7 @@ case ${ac_RNG} in
AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] )
;;
*)
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
;;
esac
@ -351,7 +391,7 @@ case ${ac_TIMERS} in
AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
;;
*)
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
;;
esac
@ -363,7 +403,7 @@ case ${ac_CHROMA} in
yes|no)
;;
*)
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
;;
esac
@ -384,12 +424,65 @@ DX_INIT_DOXYGEN([$PACKAGE_NAME], [doxygen.cfg])
############### Ouput
cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd}
GRID_CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
GRID_LDFLAGS="$AM_LDFLAGS $LDFLAGS"
GRID_LIBS=$LIBS
GRID_SHORT_SHA=`git rev-parse --short HEAD`
GRID_SHA=`git rev-parse HEAD`
GRID_BRANCH=`git rev-parse --abbrev-ref HEAD`
AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS"
AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS"
AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS"
AC_SUBST([AM_CFLAGS])
AC_SUBST([AM_CXXFLAGS])
AC_SUBST([AM_LDFLAGS])
AC_SUBST([GRID_CXXFLAGS])
AC_SUBST([GRID_LDFLAGS])
AC_SUBST([GRID_LIBS])
AC_SUBST([GRID_SHA])
AC_SUBST([GRID_BRANCH])
git_commit=`cd $srcdir && ./scripts/configure.commit`
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Summary of configuration for $PACKAGE v$VERSION
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
----- GIT VERSION -------------------------------------
$git_commit
----- PLATFORM ----------------------------------------
architecture (build) : $build_cpu
os (build) : $build_os
architecture (target) : $target_cpu
os (target) : $target_os
compiler vendor : ${ax_cv_cxx_compiler_vendor}
compiler version : ${ax_cv_gxx_version}
----- BUILD OPTIONS -----------------------------------
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
Threading : ${ac_openmp}
Communications type : ${comms_type}
Default precision : ${ac_PRECISION}
Software FP16 conversion : ${ac_SFW_FP16}
RNG choice : ${ac_RNG}
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
LAPACK : ${ac_LAPACK}
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
LIME (ILDG support) : `if test "x$have_lime" = xtrue; then echo yes; else echo no; fi`
HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
----- BUILD FLAGS -------------------------------------
CXXFLAGS:
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LDFLAGS:
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LIBS:
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
-------------------------------------------------------" > grid.configure.summary
GRID_SUMMARY="`cat grid.configure.summary`"
AM_SUBST_NOTMAKE([GRID_SUMMARY])
AC_SUBST([GRID_SUMMARY])
AC_CONFIG_FILES([grid-config], [chmod +x grid-config])
AC_CONFIG_FILES(Makefile)
AC_CONFIG_FILES(lib/Makefile)
AC_CONFIG_FILES(tests/Makefile)
@ -400,6 +493,7 @@ AC_CONFIG_FILES(tests/forces/Makefile)
AC_CONFIG_FILES(tests/hadrons/Makefile)
AC_CONFIG_FILES(tests/hmc/Makefile)
AC_CONFIG_FILES(tests/solver/Makefile)
AC_CONFIG_FILES(tests/smearing/Makefile)
AC_CONFIG_FILES(tests/qdpxx/Makefile)
AC_CONFIG_FILES(tests/testu01/Makefile)
AC_CONFIG_FILES(benchmarks/Makefile)
@ -407,36 +501,7 @@ AC_CONFIG_FILES(extras/Makefile)
AC_CONFIG_FILES(extras/Hadrons/Makefile)
AC_OUTPUT
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Summary of configuration for $PACKAGE v$VERSION
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
echo ""
cat grid.configure.summary
echo ""
----- PLATFORM ----------------------------------------
architecture (build) : $build_cpu
os (build) : $build_os
architecture (target) : $target_cpu
os (target) : $target_os
compiler vendor : ${ax_cv_cxx_compiler_vendor}
compiler version : ${ax_cv_gxx_version}
----- BUILD OPTIONS -----------------------------------
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
Threading : ${ac_openmp}
Communications type : ${comms_type}
Default precision : ${ac_PRECISION}
RNG choice : ${ac_RNG}
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
LAPACK : ${ac_LAPACK}
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
----- BUILD FLAGS -------------------------------------
CXXFLAGS:
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LDFLAGS:
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LIBS:
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
-------------------------------------------------------" > config.summary
echo ""
cat config.summary
echo ""

View File

@ -162,7 +162,8 @@ void Application::saveParameterFile(const std::string parameterFileName)
sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)"
#define DEFINE_MEMPEAK \
auto memPeak = [this](const std::vector<unsigned int> &program)\
GeneticScheduler<unsigned int>::ObjFunc memPeak = \
[this](const std::vector<unsigned int> &program)\
{\
unsigned int memPeak;\
bool msg;\

View File

@ -145,6 +145,15 @@ std::string typeName(void)
return typeName(typeIdPt<T>());
}
// default writers/readers
#ifdef HAVE_HDF5
typedef Hdf5Reader CorrReader;
typedef Hdf5Writer CorrWriter;
#else
typedef XmlReader CorrReader;
typedef XmlWriter CorrWriter;
#endif
END_HADRONS_NAMESPACE
#endif // Hadrons_Global_hpp_

View File

@ -29,12 +29,20 @@ See the full license in the file "LICENSE" in the top level distribution directo
#include <Grid/Hadrons/Modules/MAction/DWF.hpp>
#include <Grid/Hadrons/Modules/MAction/Wilson.hpp>
#include <Grid/Hadrons/Modules/MContraction/Baryon.hpp>
#include <Grid/Hadrons/Modules/MContraction/DiscLoop.hpp>
#include <Grid/Hadrons/Modules/MContraction/Gamma3pt.hpp>
#include <Grid/Hadrons/Modules/MContraction/Meson.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp>
#include <Grid/Hadrons/Modules/MGauge/Load.hpp>
#include <Grid/Hadrons/Modules/MGauge/Random.hpp>
#include <Grid/Hadrons/Modules/MGauge/Unit.hpp>
#include <Grid/Hadrons/Modules/MLoop/NoiseLoop.hpp>
#include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp>
#include <Grid/Hadrons/Modules/MSource/Point.hpp>
#include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp>
#include <Grid/Hadrons/Modules/MSource/Wall.hpp>
#include <Grid/Hadrons/Modules/MSource/Z2.hpp>
#include <Grid/Hadrons/Modules/Quark.hpp>

View File

@ -112,7 +112,7 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void)
<< " quarks '" << par().q1 << "', '" << par().q2 << "', and '"
<< par().q3 << "'" << std::endl;
XmlWriter writer(par().output);
CorrWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q2);

View File

@ -0,0 +1,144 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/DiscLoop.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_DiscLoop_hpp_
#define Hadrons_DiscLoop_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* DiscLoop *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
class DiscLoopPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(DiscLoopPar,
std::string, q_loop,
Gamma::Algebra, gamma,
std::string, output);
};
template <typename FImpl>
class TDiscLoop: public Module<DiscLoopPar>
{
TYPE_ALIASES(FImpl,);
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
Gamma::Algebra, gamma,
std::vector<Complex>, corr);
};
public:
// constructor
TDiscLoop(const std::string name);
// destructor
virtual ~TDiscLoop(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(DiscLoop, TDiscLoop<FIMPL>, MContraction);
/******************************************************************************
* TDiscLoop implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TDiscLoop<FImpl>::TDiscLoop(const std::string name)
: Module<DiscLoopPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TDiscLoop<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().q_loop};
return in;
}
template <typename FImpl>
std::vector<std::string> TDiscLoop<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDiscLoop<FImpl>::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDiscLoop<FImpl>::execute(void)
{
LOG(Message) << "Computing disconnected loop contraction '" << getName()
<< "' using '" << par().q_loop << "' with " << par().gamma
<< " insertion." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q_loop = *env().template getObject<PropagatorField>(par().q_loop);
LatticeComplex c(env().getGrid());
Gamma gamma(par().gamma);
std::vector<TComplex> buf;
Result result;
c = trace(gamma*q_loop);
sliceSum(c, buf, Tp);
result.gamma = par().gamma;
result.corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
{
result.corr[t] = TensorRemove(buf[t]);
}
write(writer, "disc", result);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_DiscLoop_hpp_

View File

@ -0,0 +1,170 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/Gamma3pt.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Gamma3pt_hpp_
#define Hadrons_Gamma3pt_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
* 3pt contraction with gamma matrix insertion.
*
* Schematic:
*
* q2 q3
* /----<------*------<----¬
* / gamma \
* / \
* i * * f
* \ /
* \ /
* \----------->----------/
* q1
*
* trace(g5*q1*adj(q2)*g5*gamma*q3)
*/
/******************************************************************************
* Gamma3pt *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
class Gamma3ptPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Gamma3ptPar,
std::string, q1,
std::string, q2,
std::string, q3,
Gamma::Algebra, gamma,
std::string, output);
};
template <typename FImpl1, typename FImpl2, typename FImpl3>
class TGamma3pt: public Module<Gamma3ptPar>
{
TYPE_ALIASES(FImpl1, 1);
TYPE_ALIASES(FImpl2, 2);
TYPE_ALIASES(FImpl3, 3);
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
Gamma::Algebra, gamma,
std::vector<Complex>, corr);
};
public:
// constructor
TGamma3pt(const std::string name);
// destructor
virtual ~TGamma3pt(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Gamma3pt, ARG(TGamma3pt<FIMPL, FIMPL, FIMPL>), MContraction);
/******************************************************************************
* TGamma3pt implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
TGamma3pt<FImpl1, FImpl2, FImpl3>::TGamma3pt(const std::string name)
: Module<Gamma3ptPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3};
return in;
}
template <typename FImpl1, typename FImpl2, typename FImpl3>
std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TGamma3pt<FImpl1, FImpl2, FImpl3>::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TGamma3pt<FImpl1, FImpl2, FImpl3>::execute(void)
{
LOG(Message) << "Computing 3pt contractions '" << getName() << "' using"
<< " quarks '" << par().q1 << "', '" << par().q2 << "' and '"
<< par().q3 << "', with " << par().gamma << " insertion."
<< std::endl;
CorrWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q3);
LatticeComplex c(env().getGrid());
Gamma g5(Gamma::Algebra::Gamma5);
Gamma gamma(par().gamma);
std::vector<TComplex> buf;
Result result;
c = trace(g5*q1*adj(q2)*(g5*gamma)*q3);
sliceSum(c, buf, Tp);
result.gamma = par().gamma;
result.corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
{
result.corr[t] = TensorRemove(buf[t]);
}
write(writer, "gamma3pt", result);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Gamma3pt_hpp_

View File

@ -6,8 +6,10 @@ Source file: extras/Hadrons/Modules/MContraction/Meson.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2017
Author: Antonin Portelli <antonin.portelli@me.com>
Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -36,20 +38,39 @@ See the full license in the file "LICENSE" in the top level distribution directo
BEGIN_HADRONS_NAMESPACE
/*
Meson contractions
-----------------------------
* options:
- q1: input propagator 1 (string)
- q2: input propagator 2 (string)
- gammas: gamma products to insert at sink & source, pairs of gamma matrices
(space-separated strings) in angled brackets (i.e. <g_sink g_src>),
in a sequence (e.g. "<Gamma5 Gamma5><Gamma5 GammaT>").
Special values: "all" - perform all possible contractions.
- mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0."),
given as multiples of (2*pi) / L.
*/
/******************************************************************************
* TMeson *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
typedef std::pair<Gamma::Algebra, Gamma::Algebra> GammaPair;
class MesonPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar,
std::string, q1,
std::string, q2,
std::string, output,
Gamma::Algebra, gammaSource,
Gamma::Algebra, gammaSink);
std::string, q1,
std::string, q2,
std::string, gammas,
std::string, mom,
std::string, output);
};
template <typename FImpl1, typename FImpl2>
@ -61,7 +82,10 @@ public:
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result, std::vector<Complex>, corr);
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
Gamma::Algebra, gamma_snk,
Gamma::Algebra, gamma_src,
std::vector<Complex>, corr);
};
public:
// constructor
@ -71,6 +95,7 @@ public:
// dependencies/products
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
virtual void parseGammaString(std::vector<GammaPair> &gammaList);
// execution
virtual void execute(void);
};
@ -103,6 +128,32 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void)
return output;
}
template <typename FImpl1, typename FImpl2>
void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList)
{
// Determine gamma matrices to insert at source/sink.
if (par().gammas.compare("all") == 0)
{
// Do all contractions.
unsigned int n_gam = Ns * Ns;
gammaList.resize(n_gam*n_gam);
for (unsigned int i = 1; i < Gamma::nGamma; i += 2)
{
for (unsigned int j = 1; j < Gamma::nGamma; j += 2)
{
gammaList.push_back(std::make_pair((Gamma::Algebra)i,
(Gamma::Algebra)j));
}
}
}
else
{
// Parse individual contractions from input string.
gammaList = strToVec<GammaPair>(par().gammas);
}
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2>
void TMeson<FImpl1, FImpl2>::execute(void)
@ -111,21 +162,44 @@ void TMeson<FImpl1, FImpl2>::execute(void)
<< " quarks '" << par().q1 << "' and '" << par().q2 << "'"
<< std::endl;
XmlWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
LatticeComplex c(env().getGrid());
Gamma gSrc(par().gammaSource), gSnk(par().gammaSink);
Gamma g5(Gamma::Algebra::Gamma5);
std::vector<TComplex> buf;
Result result;
c = trace(gSnk*q1*adj(gSrc)*g5*adj(q2)*g5);
sliceSum(c, buf, Tp);
result.corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
CorrWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
LatticeComplex c(env().getGrid());
Gamma g5(Gamma::Algebra::Gamma5);
std::vector<GammaPair> gammaList;
std::vector<TComplex> buf;
std::vector<Result> result;
std::vector<Real> p;
p = strToVec<Real>(par().mom);
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
Complex i(0.0,1.0);
ph = zero;
for(unsigned int mu = 0; mu < env().getNd(); mu++)
{
result.corr[t] = TensorRemove(buf[t]);
LatticeCoordinate(coor, mu);
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu])));
}
ph = exp((Real)(2*M_PI)*i*ph);
parseGammaString(gammaList);
result.resize(gammaList.size());
for (unsigned int i = 0; i < result.size(); ++i)
{
Gamma gSnk(gammaList[i].first);
Gamma gSrc(gammaList[i].second);
c = trace((g5*gSnk)*q1*(adj(gSrc)*g5)*adj(q2))*ph;
sliceSum(c, buf, Tp);
result[i].gamma_snk = gammaList[i].first;
result[i].gamma_src = gammaList[i].second;
result[i].corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
{
result[i].corr[t] = TensorRemove(buf[t]);
}
}
write(writer, "meson", result);
}

View File

@ -0,0 +1,114 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_WeakHamiltonian_hpp_
#define Hadrons_WeakHamiltonian_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakHamiltonian *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
/*******************************************************************************
* Utilities for contractions involving the Weak Hamiltonian.
******************************************************************************/
//// Sum and store correlator.
#define MAKE_DIAG(exp, buf, res, n)\
sliceSum(exp, buf, Tp);\
res.name = (n);\
res.corr.resize(buf.size());\
for (unsigned int t = 0; t < buf.size(); ++t)\
{\
res.corr[t] = TensorRemove(buf[t]);\
}
//// Contraction of mu index: use 'mu' variable in exp.
#define SUM_MU(buf,exp)\
buf = zero;\
for (unsigned int mu = 0; mu < ndim; ++mu)\
{\
buf += exp;\
}
enum
{
i_V = 0,
i_A = 1,
n_i = 2
};
class WeakHamiltonianPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(WeakHamiltonianPar,
std::string, q1,
std::string, q2,
std::string, q3,
std::string, q4,
std::string, output);
};
#define MAKE_WEAK_MODULE(modname)\
class T##modname: public Module<WeakHamiltonianPar>\
{\
public:\
TYPE_ALIASES(FIMPL,)\
class Result: Serializable\
{\
public:\
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,\
std::string, name,\
std::vector<Complex>, corr);\
};\
public:\
/* constructor */ \
T##modname(const std::string name);\
/* destructor */ \
virtual ~T##modname(void) = default;\
/* dependency relation */ \
virtual std::vector<std::string> getInput(void);\
virtual std::vector<std::string> getOutput(void);\
/* setup */ \
virtual void setup(void);\
/* execution */ \
virtual void execute(void);\
std::vector<std::string> VA_label = {"V", "A"};\
};\
MODULE_REGISTER_NS(modname, T##modname, MContraction);
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_WeakHamiltonian_hpp_

View File

@ -0,0 +1,137 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MContraction;
/*
* Weak Hamiltonian current-current contractions, Eye-type.
*
* These contractions are generated by the Q1 and Q2 operators in the physical
* basis (see e.g. Fig 3 of arXiv:1507.03094).
*
* Schematics: q4 |
* /-<-¬ |
* / \ | q2 q3
* \ / | /----<------*------<----¬
* q2 \ / q3 | / /-*-¬ \
* /-----<-----* *-----<----¬ | / / \ \
* i * H_W * f | i * \ / q4 * f
* \ / | \ \->-/ /
* \ / | \ /
* \---------->---------/ | \----------->----------/
* q1 | q1
* |
* Saucer (S) | Eye (E)
*
* S: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1]*q4*gL[mu][p_2])
* E: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1])*trace(q4*gL[mu][p_2])
*/
/******************************************************************************
* TWeakHamiltonianEye implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TWeakHamiltonianEye::TWeakHamiltonianEye(const std::string name)
: Module<WeakHamiltonianPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TWeakHamiltonianEye::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
return in;
}
std::vector<std::string> TWeakHamiltonianEye::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TWeakHamiltonianEye::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
void TWeakHamiltonianEye::execute(void)
{
LOG(Message) << "Computing Weak Hamiltonian (Eye type) contractions '"
<< getName() << "' using quarks '" << par().q1 << "', '"
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
<< "'." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
LatticeComplex expbuf(env().getGrid());
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_eye_diag);
unsigned int ndim = env().getNd();
PropagatorField tmp1(env().getGrid());
LatticeComplex tmp2(env().getGrid());
std::vector<PropagatorField> S_body(ndim, tmp1);
std::vector<PropagatorField> S_loop(ndim, tmp1);
std::vector<LatticeComplex> E_body(ndim, tmp2);
std::vector<LatticeComplex> E_loop(ndim, tmp2);
// Setup for S-type contractions.
for (int mu = 0; mu < ndim; ++mu)
{
S_body[mu] = MAKE_SE_BODY(q1, q2, q3, GammaL(Gamma::gmu[mu]));
S_loop[mu] = MAKE_SE_LOOP(q4, GammaL(Gamma::gmu[mu]));
}
// Perform S-type contractions.
SUM_MU(expbuf, trace(S_body[mu]*S_loop[mu]))
MAKE_DIAG(expbuf, corrbuf, result[S_diag], "HW_S")
// Recycle sub-expressions for E-type contractions.
for (unsigned int mu = 0; mu < ndim; ++mu)
{
E_body[mu] = trace(S_body[mu]);
E_loop[mu] = trace(S_loop[mu]);
}
// Perform E-type contractions.
SUM_MU(expbuf, E_body[mu]*E_loop[mu])
MAKE_DIAG(expbuf, corrbuf, result[E_diag], "HW_E")
write(writer, "HW_Eye", result);
}

View File

@ -0,0 +1,58 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_WeakHamiltonianEye_hpp_
#define Hadrons_WeakHamiltonianEye_hpp_
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakHamiltonianEye *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
enum
{
S_diag = 0,
E_diag = 1,
n_eye_diag = 2
};
// Saucer and Eye subdiagram contractions.
#define MAKE_SE_BODY(Q_1, Q_2, Q_3, gamma) (Q_3*g5*Q_1*adj(Q_2)*g5*gamma)
#define MAKE_SE_LOOP(Q_loop, gamma) (Q_loop*gamma)
MAKE_WEAK_MODULE(WeakHamiltonianEye)
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_WeakHamiltonianEye_hpp_

View File

@ -0,0 +1,139 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MContraction;
/*
* Weak Hamiltonian current-current contractions, Non-Eye-type.
*
* These contractions are generated by the Q1 and Q2 operators in the physical
* basis (see e.g. Fig 3 of arXiv:1507.03094).
*
* Schematic:
* q2 q3 | q2 q3
* /--<--¬ /--<--¬ | /--<--¬ /--<--¬
* / \ / \ | / \ / \
* / \ / \ | / \ / \
* / \ / \ | / \ / \
* i * * H_W * f | i * * * H_W * f
* \ * | | \ / \ /
* \ / \ / | \ / \ /
* \ / \ / | \ / \ /
* \ / \ / | \-->--/ \-->--/
* \-->--/ \-->--/ | q1 q4
* q1 q4 |
* Connected (C) | Wing (W)
*
* C: trace(q1*adj(q2)*g5*gL[mu]*q3*adj(q4)*g5*gL[mu])
* W: trace(q1*adj(q2)*g5*gL[mu])*trace(q3*adj(q4)*g5*gL[mu])
*
*/
/******************************************************************************
* TWeakHamiltonianNonEye implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TWeakHamiltonianNonEye::TWeakHamiltonianNonEye(const std::string name)
: Module<WeakHamiltonianPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TWeakHamiltonianNonEye::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
return in;
}
std::vector<std::string> TWeakHamiltonianNonEye::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TWeakHamiltonianNonEye::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
void TWeakHamiltonianNonEye::execute(void)
{
LOG(Message) << "Computing Weak Hamiltonian (Non-Eye type) contractions '"
<< getName() << "' using quarks '" << par().q1 << "', '"
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
<< "'." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
LatticeComplex expbuf(env().getGrid());
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_noneye_diag);
unsigned int ndim = env().getNd();
PropagatorField tmp1(env().getGrid());
LatticeComplex tmp2(env().getGrid());
std::vector<PropagatorField> C_i_side_loop(ndim, tmp1);
std::vector<PropagatorField> C_f_side_loop(ndim, tmp1);
std::vector<LatticeComplex> W_i_side_loop(ndim, tmp2);
std::vector<LatticeComplex> W_f_side_loop(ndim, tmp2);
// Setup for C-type contractions.
for (int mu = 0; mu < ndim; ++mu)
{
C_i_side_loop[mu] = MAKE_CW_SUBDIAG(q1, q2, GammaL(Gamma::gmu[mu]));
C_f_side_loop[mu] = MAKE_CW_SUBDIAG(q3, q4, GammaL(Gamma::gmu[mu]));
}
// Perform C-type contractions.
SUM_MU(expbuf, trace(C_i_side_loop[mu]*C_f_side_loop[mu]))
MAKE_DIAG(expbuf, corrbuf, result[C_diag], "HW_C")
// Recycle sub-expressions for W-type contractions.
for (unsigned int mu = 0; mu < ndim; ++mu)
{
W_i_side_loop[mu] = trace(C_i_side_loop[mu]);
W_f_side_loop[mu] = trace(C_f_side_loop[mu]);
}
// Perform W-type contractions.
SUM_MU(expbuf, W_i_side_loop[mu]*W_f_side_loop[mu])
MAKE_DIAG(expbuf, corrbuf, result[W_diag], "HW_W")
write(writer, "HW_NonEye", result);
}

View File

@ -0,0 +1,57 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_WeakHamiltonianNonEye_hpp_
#define Hadrons_WeakHamiltonianNonEye_hpp_
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakHamiltonianNonEye *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
enum
{
W_diag = 0,
C_diag = 1,
n_noneye_diag = 2
};
// Wing and Connected subdiagram contractions
#define MAKE_CW_SUBDIAG(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma)
MAKE_WEAK_MODULE(WeakHamiltonianNonEye)
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_WeakHamiltonianNonEye_hpp_

View File

@ -0,0 +1,135 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MContraction;
/*
* Weak Hamiltonian + current contractions, disconnected topology for neutral
* mesons.
*
* These contractions are generated by operators Q_1,...,10 of the dS=1 Weak
* Hamiltonian in the physical basis and an additional current J (see e.g.
* Fig 11 of arXiv:1507.03094).
*
* Schematic:
*
* q2 q4 q3
* /--<--¬ /---<--¬ /---<--¬
* / \ / \ / \
* i * * H_W | J * * f
* \ / \ / \ /
* \--->---/ \-------/ \------/
* q1
*
* options
* - q1: input propagator 1 (string)
* - q2: input propagator 2 (string)
* - q3: input propagator 3 (string), assumed to be sequential propagator
* - q4: input propagator 4 (string), assumed to be a loop
*
* type 1: trace(q1*adj(q2)*g5*gL[mu])*trace(loop*gL[mu])*trace(q3*g5)
* type 2: trace(q1*adj(q2)*g5*gL[mu]*loop*gL[mu])*trace(q3*g5)
*/
/*******************************************************************************
* TWeakNeutral4ptDisc implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TWeakNeutral4ptDisc::TWeakNeutral4ptDisc(const std::string name)
: Module<WeakHamiltonianPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TWeakNeutral4ptDisc::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
return in;
}
std::vector<std::string> TWeakNeutral4ptDisc::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TWeakNeutral4ptDisc::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
void TWeakNeutral4ptDisc::execute(void)
{
LOG(Message) << "Computing Weak Hamiltonian neutral disconnected contractions '"
<< getName() << "' using quarks '" << par().q1 << "', '"
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
<< "'." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
LatticeComplex expbuf(env().getGrid());
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_neut_disc_diag);
unsigned int ndim = env().getNd();
PropagatorField tmp(env().getGrid());
std::vector<PropagatorField> meson(ndim, tmp);
std::vector<PropagatorField> loop(ndim, tmp);
LatticeComplex curr(env().getGrid());
// Setup for type 1 contractions.
for (int mu = 0; mu < ndim; ++mu)
{
meson[mu] = MAKE_DISC_MESON(q1, q2, GammaL(Gamma::gmu[mu]));
loop[mu] = MAKE_DISC_LOOP(q4, GammaL(Gamma::gmu[mu]));
}
curr = MAKE_DISC_CURR(q3, GammaL(Gamma::Algebra::Gamma5));
// Perform type 1 contractions.
SUM_MU(expbuf, trace(meson[mu]*loop[mu]))
expbuf *= curr;
MAKE_DIAG(expbuf, corrbuf, result[neut_disc_1_diag], "HW_disc0_1")
// Perform type 2 contractions.
SUM_MU(expbuf, trace(meson[mu])*trace(loop[mu]))
expbuf *= curr;
MAKE_DIAG(expbuf, corrbuf, result[neut_disc_2_diag], "HW_disc0_2")
write(writer, "HW_disc0", result);
}

View File

@ -0,0 +1,59 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_WeakNeutral4ptDisc_hpp_
#define Hadrons_WeakNeutral4ptDisc_hpp_
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakNeutral4ptDisc *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
enum
{
neut_disc_1_diag = 0,
neut_disc_2_diag = 1,
n_neut_disc_diag = 2
};
// Neutral 4pt disconnected subdiagram contractions.
#define MAKE_DISC_MESON(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma)
#define MAKE_DISC_LOOP(Q_LOOP, gamma) (Q_LOOP*gamma)
#define MAKE_DISC_CURR(Q_c, gamma) (trace(Q_c*gamma))
MAKE_WEAK_MODULE(WeakNeutral4ptDisc)
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_WeakNeutral4ptDisc_hpp_

View File

@ -0,0 +1,132 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MLoop/NoiseLoop.hpp
Copyright (C) 2016
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_NoiseLoop_hpp_
#define Hadrons_NoiseLoop_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
Noise loop propagator
-----------------------------
* loop_x = q_x * adj(eta_x)
* options:
- q = Result of inversion on noise source.
- eta = noise source.
*/
/******************************************************************************
* NoiseLoop *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MLoop)
class NoiseLoopPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(NoiseLoopPar,
std::string, q,
std::string, eta);
};
template <typename FImpl>
class TNoiseLoop: public Module<NoiseLoopPar>
{
public:
TYPE_ALIASES(FImpl,);
public:
// constructor
TNoiseLoop(const std::string name);
// destructor
virtual ~TNoiseLoop(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(NoiseLoop, TNoiseLoop<FIMPL>, MLoop);
/******************************************************************************
* TNoiseLoop implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TNoiseLoop<FImpl>::TNoiseLoop(const std::string name)
: Module<NoiseLoopPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TNoiseLoop<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().q, par().eta};
return in;
}
template <typename FImpl>
std::vector<std::string> TNoiseLoop<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TNoiseLoop<FImpl>::setup(void)
{
env().template registerLattice<PropagatorField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TNoiseLoop<FImpl>::execute(void)
{
PropagatorField &loop = *env().template createLattice<PropagatorField>(getName());
PropagatorField &q = *env().template getObject<PropagatorField>(par().q);
PropagatorField &eta = *env().template getObject<PropagatorField>(par().eta);
loop = q*adj(eta);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_NoiseLoop_hpp_

View File

@ -6,6 +6,7 @@ Source file: extras/Hadrons/Modules/MSource/SeqGamma.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2017
Author: Antonin Portelli <antonin.portelli@me.com>
@ -149,9 +150,9 @@ void TSeqGamma<FImpl>::execute(void)
for(unsigned int mu = 0; mu < env().getNd(); mu++)
{
LatticeCoordinate(coor, mu);
ph = ph + p[mu]*coor;
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu])));
}
ph = exp(i*ph);
ph = exp((Real)(2*M_PI)*i*ph);
LatticeCoordinate(t, Tp);
src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q);
}

View File

@ -0,0 +1,147 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MSource/Wall.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_WallSource_hpp_
#define Hadrons_WallSource_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
Wall source
-----------------------------
* src_x = delta(x_3 - tW) * exp(i x.mom)
* options:
- tW: source timeslice (integer)
- mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.")
*/
/******************************************************************************
* Wall *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MSource)
class WallPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(WallPar,
unsigned int, tW,
std::string, mom);
};
template <typename FImpl>
class TWall: public Module<WallPar>
{
public:
TYPE_ALIASES(FImpl,);
public:
// constructor
TWall(const std::string name);
// destructor
virtual ~TWall(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Wall, TWall<FIMPL>, MSource);
/******************************************************************************
* TWall implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TWall<FImpl>::TWall(const std::string name)
: Module<WallPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TWall<FImpl>::getInput(void)
{
std::vector<std::string> in;
return in;
}
template <typename FImpl>
std::vector<std::string> TWall<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TWall<FImpl>::setup(void)
{
env().template registerLattice<PropagatorField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TWall<FImpl>::execute(void)
{
LOG(Message) << "Generating wall source at t = " << par().tW
<< " with momentum " << par().mom << std::endl;
PropagatorField &src = *env().template createLattice<PropagatorField>(getName());
Lattice<iScalar<vInteger>> t(env().getGrid());
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
std::vector<Real> p;
Complex i(0.0,1.0);
p = strToVec<Real>(par().mom);
ph = zero;
for(unsigned int mu = 0; mu < Nd; mu++)
{
LatticeCoordinate(coor, mu);
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu])));
}
ph = exp((Real)(2*M_PI)*i*ph);
LatticeCoordinate(t, Tp);
src = 1.;
src = where((t == par().tW), src*ph, 0.*src);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_WallSource_hpp_

View File

@ -173,7 +173,7 @@ void TQuark<FImpl>::execute(void)
*env().template getObject<PropagatorField>(getName());
axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0);
axpby_ssp_pplus(sol, 0., sol, 1., sol, 0, Ls_-1);
axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1);
ExtractSlice(tmp, sol, 0, 0);
FermToProp(p4d, tmp, s, c);
}

View File

@ -1,4 +1,7 @@
modules_cc =\
Modules/MContraction/WeakHamiltonianEye.cc \
Modules/MContraction/WeakHamiltonianNonEye.cc \
Modules/MContraction/WeakNeutral4ptDisc.cc \
Modules/MGauge/Load.cc \
Modules/MGauge/Random.cc \
Modules/MGauge/Unit.cc
@ -7,13 +10,21 @@ modules_hpp =\
Modules/MAction/DWF.hpp \
Modules/MAction/Wilson.hpp \
Modules/MContraction/Baryon.hpp \
Modules/MContraction/DiscLoop.hpp \
Modules/MContraction/Gamma3pt.hpp \
Modules/MContraction/Meson.hpp \
Modules/MContraction/WeakHamiltonian.hpp \
Modules/MContraction/WeakHamiltonianEye.hpp \
Modules/MContraction/WeakHamiltonianNonEye.hpp \
Modules/MContraction/WeakNeutral4ptDisc.hpp \
Modules/MGauge/Load.hpp \
Modules/MGauge/Random.hpp \
Modules/MGauge/Unit.hpp \
Modules/MLoop/NoiseLoop.hpp \
Modules/MSolver/RBPrecCG.hpp \
Modules/MSource/Point.hpp \
Modules/MSource/SeqGamma.hpp \
Modules/MSource/Wall.hpp \
Modules/MSource/Z2.hpp \
Modules/Quark.hpp

View File

@ -20,4 +20,17 @@ The simple testcase in this directory is the submitted bug report that encapsula
problem. The test case works with icpc and with clang++, but fails consistently on g++
current variants.
Peter
Peter
************
Second GCC bug reported, see Issue 100.
https://wandbox.org/permlink/tzssJza6R9XnqANw
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80652
Getting Travis fails under gcc-5 for Test_simd, now that I added more comprehensive testing to the
CI test suite. The limitations of Travis runtime limits & weak cores are being shown.
Travis uses 5.4.1 for g++-5.

86
grid-config.in Executable file
View File

@ -0,0 +1,86 @@
#! /bin/sh
prefix=@prefix@
exec_prefix=@exec_prefix@
includedir=@includedir@
usage()
{
cat <<EOF
Usage: grid-config [OPTION]
Known values for OPTION are:
--prefix show Grid installation prefix
--cxxflags print pre-processor and compiler flags
--ldflags print library linking flags
--libs print library linking information
--summary print full build summary
--help display this help and exit
--version output version information
--git print git revision
EOF
exit $1
}
if test $# -eq 0; then
usage 1
fi
cflags=false
libs=false
while test $# -gt 0; do
case "$1" in
-*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
*) optarg= ;;
esac
case "$1" in
--prefix)
echo $prefix
;;
--version)
echo @VERSION@
exit 0
;;
--git)
echo "@GRID_BRANCH@ @GRID_SHA@"
exit 0
;;
--help)
usage 0
;;
--cxxflags)
echo @GRID_CXXFLAGS@
;;
--ldflags)
echo @GRID_LDFLAGS@
;;
--libs)
echo @GRID_LIBS@
;;
--summary)
echo ""
echo "@GRID_SUMMARY@"
echo ""
;;
*)
usage
exit 1
;;
esac
shift
done
exit 0

37
lib/DisableWarnings.h Normal file
View File

@ -0,0 +1,37 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/DisableWarnings.h
Copyright (C) 2016
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef DISABLE_WARNINGS_H
#define DISABLE_WARNINGS_H
//disables and intel compiler specific warning (in json.hpp)
#pragma warning disable 488
#endif

View File

@ -38,28 +38,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_BASE_H
#define GRID_BASE_H
///////////////////
// Std C++ dependencies
///////////////////
#include <cassert>
#include <complex>
#include <vector>
#include <iostream>
#include <iomanip>
#include <random>
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>
#include <sys/time.h>
#include <chrono>
///////////////////
// Grid headers
///////////////////
#include "Config.h"
#include <Grid/GridStd.h>
#include <Grid/perfmon/Timer.h>
#include <Grid/perfmon/PerfCount.h>

27
lib/GridStd.h Normal file
View File

@ -0,0 +1,27 @@
#ifndef GRID_STD_H
#define GRID_STD_H
///////////////////
// Std C++ dependencies
///////////////////
#include <cassert>
#include <complex>
#include <vector>
#include <iostream>
#include <iomanip>
#include <random>
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>
#include <sys/time.h>
#include <chrono>
///////////////////
// Grid config
///////////////////
#include "Config.h"
#endif /* GRID_STD_H */

9
lib/Grid_Eigen_Dense.h Normal file
View File

@ -0,0 +1,9 @@
#pragma once
#if defined __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
#include <Grid/Eigen/Dense>
#if defined __GNUC__
#pragma GCC diagnostic pop
#endif

View File

@ -46,7 +46,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
// Lanczos support
#include <Grid/algorithms/iterative/MatrixUtils.h>
//#include <Grid/algorithms/iterative/MatrixUtils.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/CoarsenedMatrix.h>
#include <Grid/algorithms/FFT.h>

View File

@ -235,7 +235,7 @@ namespace Grid {
Field tmp(in._grid);
_Mat.MeooeDag(in,tmp);
_Mat.MooeeInvDag(tmp,out);
_Mat.MooeeInvDag(tmp,out);
_Mat.MeooeDag(out,tmp);
_Mat.MooeeDag(in,out);

View File

@ -197,8 +197,9 @@ namespace Grid {
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
GridBase *grid=in._grid;
//std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
//<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
// std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
//std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
int vol=grid->gSites();

View File

@ -16,7 +16,7 @@
#define INCLUDED_ALG_REMEZ_H
#include <stddef.h>
#include <Config.h>
#include <Grid/GridStd.h>
#ifdef HAVE_LIBGMP
#include "bigfloat.h"

View File

@ -0,0 +1,366 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h
Copyright (C) 2017
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H
#define GRID_BLOCK_CONJUGATE_GRADIENT_H
namespace Grid {
//////////////////////////////////////////////////////////////////////////
// Block conjugate gradient. Dimension zero should be the block direction
//////////////////////////////////////////////////////////////////////////
template <class Field>
class BlockConjugateGradient : public OperatorFunction<Field> {
public:
typedef typename Field::scalar_type scomplex;
const int blockDim = 0;
int Nblock;
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
BlockConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol),
MaxIterations(maxit),
ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
int Orthog = 0; // First dimension is block dim
Nblock = Src._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
Psi.checkerboard = Src.checkerboard;
conformable(Psi, Src);
Field P(Src);
Field AP(Src);
Field R(Src);
Eigen::MatrixXcd m_pAp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_alpha = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_beta = Eigen::MatrixXcd::Zero(Nblock,Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,Src,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,Src,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,Psi,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
// Initial search dir is guess
Linop.HermOp(Psi, AP);
/************************************************************************
* Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
************************************************************************
* O'Leary : R = B - A X
* O'Leary : P = M R ; preconditioner M = 1
* O'Leary : alpha = PAP^{-1} RMR
* O'Leary : beta = RMR^{-1}_old RMR_new
* O'Leary : X=X+Palpha
* O'Leary : R_new=R_old-AP alpha
* O'Leary : P=MR_new+P beta
*/
R = Src - AP;
P = R;
sliceInnerProductMatrix(m_rr,R,R,Orthog);
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
RealD rrsum=0;
for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b));
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
MatrixTimer.Start();
Linop.HermOp(P, AP);
MatrixTimer.Stop();
// Alpha
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_pAp,P,AP,Orthog);
sliceInnerTimer.Stop();
m_pAp_inv = m_pAp.inverse();
m_alpha = m_pAp_inv * m_rr ;
// Psi, R update
sliceMaddTimer.Start();
sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog); // add alpha * P to psi
sliceMaddMatrix(R ,m_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
sliceMaddTimer.Stop();
// Beta
m_rr_inv = m_rr.inverse();
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_rr,R,R,Orthog);
sliceInnerTimer.Stop();
m_beta = m_rr_inv *m_rr;
// Search update
sliceMaddTimer.Start();
sliceMaddMatrix(AP,m_beta,P,R,Orthog);
sliceMaddTimer.Stop();
P= AP;
/*********************
* convergence monitor
*********************
*/
RealD max_resid=0;
for(int b=0;b<Nblock;b++){
RealD rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(Psi, AP);
AP = AP-Src;
std::cout << GridLogMessage <<"\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
};
//////////////////////////////////////////////////////////////////////////
// multiRHS conjugate gradient. Dimension zero should be the block direction
//////////////////////////////////////////////////////////////////////////
template <class Field>
class MultiRHSConjugateGradient : public OperatorFunction<Field> {
public:
typedef typename Field::scalar_type scomplex;
const int blockDim = 0;
int Nblock;
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
MultiRHSConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol),
MaxIterations(maxit),
ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
int Orthog = 0; // First dimension is block dim
Nblock = Src._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
Psi.checkerboard = Src.checkerboard;
conformable(Psi, Src);
Field P(Src);
Field AP(Src);
Field R(Src);
std::vector<ComplexD> v_pAp(Nblock);
std::vector<RealD> v_rr (Nblock);
std::vector<RealD> v_rr_inv(Nblock);
std::vector<RealD> v_alpha(Nblock);
std::vector<RealD> v_beta(Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,Src,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,Src,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,Psi,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
// Initial search dir is guess
Linop.HermOp(Psi, AP);
R = Src - AP;
P = R;
sliceNorm(v_rr,R,Orthog);
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch sliceNormTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
RealD rrsum=0;
for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]);
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
MatrixTimer.Start();
Linop.HermOp(P, AP);
MatrixTimer.Stop();
// Alpha
// sliceInnerProductVectorTest(v_pAp_test,P,AP,Orthog);
sliceInnerTimer.Start();
sliceInnerProductVector(v_pAp,P,AP,Orthog);
sliceInnerTimer.Stop();
for(int b=0;b<Nblock;b++){
// std::cout << " "<< v_pAp[b]<<" "<< v_pAp_test[b]<<std::endl;
v_alpha[b] = v_rr[b]/real(v_pAp[b]);
}
// Psi, R update
sliceMaddTimer.Start();
sliceMaddVector(Psi,v_alpha, P,Psi,Orthog); // add alpha * P to psi
sliceMaddVector(R ,v_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
sliceMaddTimer.Stop();
// Beta
for(int b=0;b<Nblock;b++){
v_rr_inv[b] = 1.0/v_rr[b];
}
sliceNormTimer.Start();
sliceNorm(v_rr,R,Orthog);
sliceNormTimer.Stop();
for(int b=0;b<Nblock;b++){
v_beta[b] = v_rr_inv[b] *v_rr[b];
}
// Search update
sliceMaddTimer.Start();
sliceMaddVector(P,v_beta,P,R,Orthog);
sliceMaddTimer.Stop();
/*********************
* convergence monitor
*********************
*/
RealD max_resid=0;
for(int b=0;b<Nblock;b++){
RealD rr = v_rr[b]/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tBlock "<<b<<" resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(Psi, AP);
AP = AP-Src;
std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tNorm " << sliceNormTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
};
}
#endif

View File

@ -78,18 +78,12 @@ class ConjugateGradient : public OperatorFunction<Field> {
cp = a;
ssq = norm2(src);
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: guess " << guess << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: p " << a << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: guess " << guess << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: p " << a << std::endl;
RealD rsq = Tolerance * Tolerance * ssq;
@ -99,8 +93,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
}
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq
<< std::endl;
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
GridStopWatch LinalgTimer;
GridStopWatch MatrixTimer;
@ -130,8 +123,11 @@ class ConjugateGradient : public OperatorFunction<Field> {
p = p * b + r;
LinalgTimer.Stop();
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl;
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
// Stopping condition
if (cp <= rsq) {
@ -139,32 +135,33 @@ class ConjugateGradient : public OperatorFunction<Field> {
Linop.HermOpAndNorm(psi, mmp, d, qq);
p = mmp - src;
RealD mmpnorm = sqrt(norm2(mmp));
RealD psinorm = sqrt(norm2(psi));
RealD srcnorm = sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage
<< "ConjugateGradient: Converged on iteration " << k << std::endl;
std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual << " target "
<< Tolerance << std::endl;
std::cout << GridLogMessage << "Time elapsed: Iterations "
<< SolverTimer.Elapsed() << " Matrix "
<< MatrixTimer.Elapsed() << " Linalg "
<< LinalgTimer.Elapsed();
std::cout << std::endl;
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl;
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
<< std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
};
}

View File

@ -30,6 +30,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define GRID_IRL_H
#include <string.h> //memset
#ifdef USE_LAPACK
void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
double *vl, double *vu, int *il, int *iu, double *abstol,
@ -37,8 +38,9 @@ void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
double *work, int *lwork, int *iwork, int *liwork,
int *info);
#endif
#include "DenseMatrix.h"
#include "EigenSort.h"
#include <Grid/algorithms/densematrix/DenseMatrix.h>
#include <Grid/algorithms/iterative/EigenSort.h>
namespace Grid {
@ -1088,8 +1090,6 @@ static void Lock(DenseMatrix<T> &H, // Hess mtx
int dfg,
bool herm)
{
//ForceTridiagonal(H);
int M = H.dim;
@ -1121,7 +1121,6 @@ static void Lock(DenseMatrix<T> &H, // Hess mtx
AH = Hermitian(QQ)*AH;
AH = AH*QQ;
for(int i=con;i<M;i++){
for(int j=con;j<M;j++){

View File

@ -1,453 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/Matrix.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef MATRIX_H
#define MATRIX_H
#include <cstdlib>
#include <string>
#include <cmath>
#include <vector>
#include <iostream>
#include <iomanip>
#include <complex>
#include <typeinfo>
#include <Grid/Grid.h>
/** Sign function **/
template <class T> T sign(T p){return ( p/abs(p) );}
/////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////// Hijack STL containers for our wicked means /////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class T> using Vector = Vector<T>;
template<class T> using Matrix = Vector<Vector<T> >;
template<class T> void Resize(Vector<T > & vec, int N) { vec.resize(N); }
template<class T> void Resize(Matrix<T > & mat, int N, int M) {
mat.resize(N);
for(int i=0;i<N;i++){
mat[i].resize(M);
}
}
template<class T> void Size(Vector<T> & vec, int &N)
{
N= vec.size();
}
template<class T> void Size(Matrix<T> & mat, int &N,int &M)
{
N= mat.size();
M= mat[0].size();
}
template<class T> void SizeSquare(Matrix<T> & mat, int &N)
{
int M; Size(mat,N,M);
assert(N==M);
}
template<class T> void SizeSame(Matrix<T> & mat1,Matrix<T> &mat2, int &N1,int &M1)
{
int N2,M2;
Size(mat1,N1,M1);
Size(mat2,N2,M2);
assert(N1==N2);
assert(M1==M2);
}
//*****************************************
//* (Complex) Vector operations *
//*****************************************
/**Conj of a Vector **/
template <class T> Vector<T> conj(Vector<T> p){
Vector<T> q(p.size());
for(int i=0;i<p.size();i++){q[i] = conj(p[i]);}
return q;
}
/** Norm of a Vector**/
template <class T> T norm(Vector<T> p){
T sum = 0;
for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
return abs(sqrt(sum));
}
/** Norm squared of a Vector **/
template <class T> T norm2(Vector<T> p){
T sum = 0;
for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
return abs((sum));
}
/** Sum elements of a Vector **/
template <class T> T trace(Vector<T> p){
T sum = 0;
for(int i=0;i<p.size();i++){sum = sum + p[i];}
return sum;
}
/** Fill a Vector with constant c **/
template <class T> void Fill(Vector<T> &p, T c){
for(int i=0;i<p.size();i++){p[i] = c;}
}
/** Normalize a Vector **/
template <class T> void normalize(Vector<T> &p){
T m = norm(p);
if( abs(m) > 0.0) for(int i=0;i<p.size();i++){p[i] /= m;}
}
/** Vector by scalar **/
template <class T, class U> Vector<T> times(Vector<T> p, U s){
for(int i=0;i<p.size();i++){p[i] *= s;}
return p;
}
template <class T, class U> Vector<T> times(U s, Vector<T> p){
for(int i=0;i<p.size();i++){p[i] *= s;}
return p;
}
/** inner product of a and b = conj(a) . b **/
template <class T> T inner(Vector<T> a, Vector<T> b){
T m = 0.;
for(int i=0;i<a.size();i++){m = m + conj(a[i])*b[i];}
return m;
}
/** sum of a and b = a + b **/
template <class T> Vector<T> add(Vector<T> a, Vector<T> b){
Vector<T> m(a.size());
for(int i=0;i<a.size();i++){m[i] = a[i] + b[i];}
return m;
}
/** sum of a and b = a - b **/
template <class T> Vector<T> sub(Vector<T> a, Vector<T> b){
Vector<T> m(a.size());
for(int i=0;i<a.size();i++){m[i] = a[i] - b[i];}
return m;
}
/**
*********************************
* Matrices *
*********************************
**/
template<class T> void Fill(Matrix<T> & mat, T&val) {
int N,M;
Size(mat,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
mat[i][j] = val;
}}
}
/** Transpose of a matrix **/
Matrix<T> Transpose(Matrix<T> & mat){
int N,M;
Size(mat,N,M);
Matrix C; Resize(C,M,N);
for(int i=0;i<M;i++){
for(int j=0;j<N;j++){
C[i][j] = mat[j][i];
}}
return C;
}
/** Set Matrix to unit matrix **/
template<class T> void Unity(Matrix<T> &mat){
int N; SizeSquare(mat,N);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
if ( i==j ) A[i][j] = 1;
else A[i][j] = 0;
}
}
}
/** Add C * I to matrix **/
template<class T>
void PlusUnit(Matrix<T> & A,T c){
int dim; SizeSquare(A,dim);
for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}
}
/** return the Hermitian conjugate of matrix **/
Matrix<T> HermitianConj(Matrix<T> &mat){
int dim; SizeSquare(mat,dim);
Matrix<T> C; Resize(C,dim,dim);
for(int i=0;i<dim;i++){
for(int j=0;j<dim;j++){
C[i][j] = conj(mat[j][i]);
}
}
return C;
}
/** return diagonal entries as a Vector **/
Vector<T> diag(Matrix<T> &A)
{
int dim; SizeSquare(A,dim);
Vector<T> d; Resize(d,dim);
for(int i=0;i<dim;i++){
d[i] = A[i][i];
}
return d;
}
/** Left multiply by a Vector **/
Vector<T> operator *(Vector<T> &B,Matrix<T> &A)
{
int K,M,N;
Size(B,K);
Size(A,M,N);
assert(K==M);
Vector<T> C; Resize(C,N);
for(int j=0;j<N;j++){
T sum = 0.0;
for(int i=0;i<M;i++){
sum += B[i] * A[i][j];
}
C[j] = sum;
}
return C;
}
/** return 1/diagonal entries as a Vector **/
Vector<T> inv_diag(Matrix<T> & A){
int dim; SizeSquare(A,dim);
Vector<T> d; Resize(d,dim);
for(int i=0;i<dim;i++){
d[i] = 1.0/A[i][i];
}
return d;
}
/** Matrix Addition **/
inline Matrix<T> operator + (Matrix<T> &A,Matrix<T> &B)
{
int N,M ; SizeSame(A,B,N,M);
Matrix C; Resize(C,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
C[i][j] = A[i][j] + B[i][j];
}
}
return C;
}
/** Matrix Subtraction **/
inline Matrix<T> operator- (Matrix<T> & A,Matrix<T> &B){
int N,M ; SizeSame(A,B,N,M);
Matrix C; Resize(C,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
C[i][j] = A[i][j] - B[i][j];
}}
return C;
}
/** Matrix scalar multiplication **/
inline Matrix<T> operator* (Matrix<T> & A,T c){
int N,M; Size(A,N,M);
Matrix C; Resize(C,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
C[i][j] = A[i][j]*c;
}}
return C;
}
/** Matrix Matrix multiplication **/
inline Matrix<T> operator* (Matrix<T> &A,Matrix<T> &B){
int K,L,N,M;
Size(A,K,L);
Size(B,N,M); assert(L==N);
Matrix C; Resize(C,K,M);
for(int i=0;i<K;i++){
for(int j=0;j<M;j++){
T sum = 0.0;
for(int k=0;k<N;k++) sum += A[i][k]*B[k][j];
C[i][j] =sum;
}
}
return C;
}
/** Matrix Vector multiplication **/
inline Vector<T> operator* (Matrix<T> &A,Vector<T> &B){
int M,N,K;
Size(A,N,M);
Size(B,K); assert(K==M);
Vector<T> C; Resize(C,N);
for(int i=0;i<N;i++){
T sum = 0.0;
for(int j=0;j<M;j++) sum += A[i][j]*B[j];
C[i] = sum;
}
return C;
}
/** Some version of Matrix norm **/
/*
inline T Norm(){ // this is not a usual L2 norm
T norm = 0;
for(int i=0;i<dim;i++){
for(int j=0;j<dim;j++){
norm += abs(A[i][j]);
}}
return norm;
}
*/
/** Some version of Matrix norm **/
template<class T> T LargestDiag(Matrix<T> &A)
{
int dim ; SizeSquare(A,dim);
T ld = abs(A[0][0]);
for(int i=1;i<dim;i++){
T cf = abs(A[i][i]);
if(abs(cf) > abs(ld) ){ld = cf;}
}
return ld;
}
/** Look for entries on the leading subdiagonal that are smaller than 'small' **/
template <class T,class U> int Chop_subdiag(Matrix<T> &A,T norm, int offset, U small)
{
int dim; SizeSquare(A,dim);
for(int l = dim - 1 - offset; l >= 1; l--) {
if((U)abs(A[l][l - 1]) < (U)small) {
A[l][l-1]=(U)0.0;
return l;
}
}
return 0;
}
/** Look for entries on the leading subdiagonal that are smaller than 'small' **/
template <class T,class U> int Chop_symm_subdiag(Matrix<T> & A,T norm, int offset, U small)
{
int dim; SizeSquare(A,dim);
for(int l = dim - 1 - offset; l >= 1; l--) {
if((U)abs(A[l][l - 1]) < (U)small) {
A[l][l - 1] = (U)0.0;
A[l - 1][l] = (U)0.0;
return l;
}
}
return 0;
}
/**Assign a submatrix to a larger one**/
template<class T>
void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
{
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
A[i][j] = S[i - row_st][j - col_st];
}
}
}
/**Get a square submatrix**/
template <class T>
Matrix<T> GetSubMtx(Matrix<T> &A,int row_st, int row_end, int col_st, int col_end)
{
Matrix<T> H; Resize(row_end - row_st,col_end-col_st);
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
H[i-row_st][j-col_st]=A[i][j];
}}
return H;
}
/**Assign a submatrix to a larger one NB remember Vector Vectors are transposes of the matricies they represent**/
template<class T>
void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
{
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
A[i][j] = S[i - row_st][j - col_st];
}}
}
/** compute b_i A_ij b_j **/ // surprised no Conj
template<class T> T proj(Matrix<T> A, Vector<T> B){
int dim; SizeSquare(A,dim);
int dimB; Size(B,dimB);
assert(dimB==dim);
T C = 0;
for(int i=0;i<dim;i++){
T sum = 0.0;
for(int j=0;j<dim;j++){
sum += A[i][j]*B[j];
}
C += B[i]*sum; // No conj?
}
return C;
}
/*
*************************************************************
*
* Matrix Vector products
*
*************************************************************
*/
// Instead make a linop and call my CG;
/// q -> q Q
template <class T,class Fermion> void times(Vector<Fermion> &q, Matrix<T> &Q)
{
int M; SizeSquare(Q,M);
int N; Size(q,N);
assert(M==N);
times(q,Q,N);
}
/// q -> q Q
template <class T> void times(multi1d<LatticeFermion> &q, Matrix<T> &Q, int N)
{
GridBase *grid = q[0]._grid;
int M; SizeSquare(Q,M);
int K; Size(q,K);
assert(N<M);
assert(N<K);
Vector<Fermion> S(N,grid );
for(int j=0;j<N;j++){
S[j] = zero;
for(int k=0;k<N;k++){
S[j] = S[j] + q[k]* Q[k][j];
}
}
for(int j=0;j<q.size();j++){
q[j] = S[j];
}
}
#endif

View File

@ -1,75 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/MatrixUtils.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_MATRIX_UTILS_H
#define GRID_MATRIX_UTILS_H
namespace Grid {
namespace MatrixUtils {
template<class T> inline void Size(Matrix<T>& A,int &N,int &M){
N=A.size(); assert(N>0);
M=A[0].size();
for(int i=0;i<N;i++){
assert(A[i].size()==M);
}
}
template<class T> inline void SizeSquare(Matrix<T>& A,int &N)
{
int M;
Size(A,N,M);
assert(N==M);
}
template<class T> inline void Fill(Matrix<T>& A,T & val)
{
int N,M;
Size(A,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
A[i][j]=val;
}}
}
template<class T> inline void Diagonal(Matrix<T>& A,T & val)
{
int N;
SizeSquare(A,N);
for(int i=0;i<N;i++){
A[i][i]=val;
}
}
template<class T> inline void Identity(Matrix<T>& A)
{
Fill(A,0.0);
Diagonal(A,1.0);
}
};
}
#endif

View File

@ -1,15 +0,0 @@
- ConjugateGradientMultiShift
- MCR
- Potentially Useful Boost libraries
- MultiArray
- Aligned allocator; memory pool
- Remez -- Mike or Boost?
- Multiprecision
- quaternians
- Tokenize
- Serialization
- Regex
- Proto (ET)
- uBlas

View File

@ -1,122 +0,0 @@
#include <math.h>
#include <stdlib.h>
#include <vector>
struct Bisection {
static void get_eig2(int row_num,std::vector<RealD> &ALPHA,std::vector<RealD> &BETA, std::vector<RealD> & eig)
{
int i,j;
std::vector<RealD> evec1(row_num+3);
std::vector<RealD> evec2(row_num+3);
RealD eps2;
ALPHA[1]=0.;
BETHA[1]=0.;
for(i=0;i<row_num-1;i++) {
ALPHA[i+1] = A[i*(row_num+1)].real();
BETHA[i+2] = A[i*(row_num+1)+1].real();
}
ALPHA[row_num] = A[(row_num-1)*(row_num+1)].real();
bisec(ALPHA,BETHA,row_num,1,row_num,1e-10,1e-10,evec1,eps2);
bisec(ALPHA,BETHA,row_num,1,row_num,1e-16,1e-16,evec2,eps2);
// Do we really need to sort here?
int begin=1;
int end = row_num;
int swapped=1;
while(swapped) {
swapped=0;
for(i=begin;i<end;i++){
if(mag(evec2[i])>mag(evec2[i+1])) {
swap(evec2+i,evec2+i+1);
swapped=1;
}
}
end--;
for(i=end-1;i>=begin;i--){
if(mag(evec2[i])>mag(evec2[i+1])) {
swap(evec2+i,evec2+i+1);
swapped=1;
}
}
begin++;
}
for(i=0;i<row_num;i++){
for(j=0;j<row_num;j++) {
if(i==j) H[i*row_num+j]=evec2[i+1];
else H[i*row_num+j]=0.;
}
}
}
static void bisec(std::vector<RealD> &c,
std::vector<RealD> &b,
int n,
int m1,
int m2,
RealD eps1,
RealD relfeh,
std::vector<RealD> &x,
RealD &eps2)
{
std::vector<RealD> wu(n+2);
RealD h,q,x1,xu,x0,xmin,xmax;
int i,a,k;
b[1]=0.0;
xmin=c[n]-fabs(b[n]);
xmax=c[n]+fabs(b[n]);
for(i=1;i<n;i++){
h=fabs(b[i])+fabs(b[i+1]);
if(c[i]+h>xmax) xmax= c[i]+h;
if(c[i]-h<xmin) xmin= c[i]-h;
}
xmax *=2.;
eps2=relfeh*((xmin+xmax)>0.0 ? xmax : -xmin);
if(eps1<=0.0) eps1=eps2;
eps2=0.5*eps1+7.0*(eps2);
x0=xmax;
for(i=m1;i<=m2;i++){
x[i]=xmax;
wu[i]=xmin;
}
for(k=m2;k>=m1;k--){
xu=xmin;
i=k;
do{
if(xu<wu[i]){
xu=wu[i];
i=m1-1;
}
i--;
}while(i>=m1);
if(x0>x[k]) x0=x[k];
while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){
x1=(xu+x0)/2;
a=0;
q=1.0;
for(i=1;i<=n;i++){
q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh);
if(q<0) a++;
}
// printf("x1=%e a=%d\n",x1,a);
if(a<k){
if(a<m1){
xu=x1;
wu[m1]=x1;
}else {
xu=x1;
wu[a+1]=x1;
if(x[a]>x1) x[a]=x1;
}
}else x0=x1;
}
x[k]=(x0+xu)/2;
}
}
}

View File

@ -1 +0,0 @@

View File

@ -6,8 +6,9 @@
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -99,7 +100,7 @@ public:
virtual int oIndex(std::vector<int> &coor)
{
int idx=0;
// Works with either global or local coordinates
// Works with either global or local coordinates
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
return idx;
}
@ -121,6 +122,12 @@ public:
Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions);
}
inline void InOutCoorToLocalCoor (std::vector<int> &ocoor, std::vector<int> &icoor, std::vector<int> &lcoor) {
lcoor.resize(_ndimension);
for (int d = 0; d < _ndimension; d++)
lcoor[d] = ocoor[d] + _rdimensions[d] * icoor[d];
}
//////////////////////////////////////////////////////////
// SIMD lane addressing
//////////////////////////////////////////////////////////
@ -128,6 +135,7 @@ public:
{
Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
}
inline int PermuteDim(int dimension){
return _simd_layout[dimension]>1;
}
@ -145,15 +153,15 @@ public:
// Distance should be either 0,1,2..
//
if ( _simd_layout[dimension] > 2 ) {
for(int d=0;d<_ndimension;d++){
if ( d != dimension ) assert ( (_simd_layout[d]==1) );
}
permute_type = RotateBit; // How to specify distance; this is not just direction.
return permute_type;
for(int d=0;d<_ndimension;d++){
if ( d != dimension ) assert ( (_simd_layout[d]==1) );
}
permute_type = RotateBit; // How to specify distance; this is not just direction.
return permute_type;
}
for(int d=_ndimension-1;d>dimension;d--){
if (_simd_layout[d]>1 ) permute_type++;
if (_simd_layout[d]>1 ) permute_type++;
}
return permute_type;
}
@ -173,6 +181,24 @@ public:
inline const std::vector<int> &LocalDimensions(void) { return _ldimensions;};
inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;};
////////////////////////////////////////////////////////////////
// Utility to print the full decomposition details
////////////////////////////////////////////////////////////////
void show_decomposition(){
std::cout << GridLogMessage << "Full Dimensions : " << _fdimensions << std::endl;
std::cout << GridLogMessage << "Global Dimensions : " << _gdimensions << std::endl;
std::cout << GridLogMessage << "Local Dimensions : " << _ldimensions << std::endl;
std::cout << GridLogMessage << "Reduced Dimensions : " << _rdimensions << std::endl;
std::cout << GridLogMessage << "Outer strides : " << _ostride << std::endl;
std::cout << GridLogMessage << "Inner strides : " << _istride << std::endl;
std::cout << GridLogMessage << "iSites : " << _isites << std::endl;
std::cout << GridLogMessage << "oSites : " << _osites << std::endl;
std::cout << GridLogMessage << "lSites : " << lSites() << std::endl;
std::cout << GridLogMessage << "gSites : " << gSites() << std::endl;
std::cout << GridLogMessage << "Nd : " << _ndimension << std::endl;
}
////////////////////////////////////////////////////////////////
// Global addressing
////////////////////////////////////////////////////////////////
@ -184,12 +210,15 @@ public:
assert(lidx<lSites());
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
}
void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
gidx=0;
int mult=1;
for(int mu=0;mu<_ndimension;mu++) {
gidx+=mult*gcoor[mu];
mult*=_gdimensions[mu];
gidx+=mult*gcoor[mu];
mult*=_gdimensions[mu];
}
}
void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor)
@ -197,9 +226,9 @@ public:
pcoor.resize(_ndimension);
lcoor.resize(_ndimension);
for(int mu=0;mu<_ndimension;mu++){
int _fld = _fdimensions[mu]/_processors[mu];
pcoor[mu] = gcoor[mu]/_fld;
lcoor[mu] = gcoor[mu]%_fld;
int _fld = _fdimensions[mu]/_processors[mu];
pcoor[mu] = gcoor[mu]/_fld;
lcoor[mu] = gcoor[mu]%_fld;
}
}
void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor)
@ -211,9 +240,9 @@ public:
/*
std::vector<int> cblcoor(lcoor);
for(int d=0;d<cblcoor.size();d++){
if( this->CheckerBoarded(d) ) {
cblcoor[d] = lcoor[d]/2;
}
if( this->CheckerBoarded(d) ) {
cblcoor[d] = lcoor[d]/2;
}
}
*/
i_idx= iIndex(lcoor);
@ -239,7 +268,7 @@ public:
{
RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor);
if(CheckerBoarded(0)){
fcoor[0] = fcoor[0]*2+cb;
fcoor[0] = fcoor[0]*2+cb;
}
}
void ProcessorCoorLocalCoorToGlobalCoor(std::vector<int> &Pcoor,std::vector<int> &Lcoor,std::vector<int> &gcoor)

View File

@ -206,7 +206,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",GroupRank,r);
shm_unlink(shm_name);
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0660);
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
if ( fd < 0 ) { perror("failed shm_open"); assert(0); }
ftruncate(fd, size);
@ -226,7 +226,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",GroupRank,r);
int fd=shm_open(shm_name,O_RDWR,0660);
int fd=shm_open(shm_name,O_RDWR,0666);
if ( fd<0 ) { perror("failed shm_open"); assert(0); }
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);

View File

@ -30,21 +30,11 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
namespace Grid {
template<class vobj>
class SimpleCompressor {
public:
void Point(int) {};
vobj operator() (const vobj &arg) {
return arg;
}
};
///////////////////////////////////////////////////////////////////
// Gather for when there is no need to SIMD split with compression
// Gather for when there is no need to SIMD split
///////////////////////////////////////////////////////////////////
template<class vobj,class cobj,class compressor> void
Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0)
template<class vobj> void
Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimension,int plane,int cbmask, int off=0)
{
int rd = rhs._grid->_rdimensions[dimension];
@ -62,7 +52,7 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimen
for(int b=0;b<e2;b++){
int o = n*stride;
int bo = n*e2;
buffer[off+bo+b]=compress(rhs._odata[so+o+b]);
buffer[off+bo+b]=rhs._odata[so+o+b];
}
}
} else {
@ -78,17 +68,16 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimen
}
}
parallel_for(int i=0;i<table.size();i++){
buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
buffer[off+table[i].first]=rhs._odata[so+table[i].second];
}
}
}
///////////////////////////////////////////////////////////////////
// Gather for when there *is* need to SIMD split with compression
// Gather for when there *is* need to SIMD split
///////////////////////////////////////////////////////////////////
template<class cobj,class vobj,class compressor> void
Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_object *> pointers,int dimension,int plane,int cbmask,compressor &compress)
template<class vobj> void
Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask)
{
int rd = rhs._grid->_rdimensions[dimension];
@ -109,8 +98,8 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
int o = n*n1;
int offset = b+n*e2;
cobj temp =compress(rhs._odata[so+o+b]);
extract<cobj>(temp,pointers,offset);
vobj temp =rhs._odata[so+o+b];
extract<vobj>(temp,pointers,offset);
}
}
@ -127,32 +116,14 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
int offset = b+n*e2;
if ( ocb & cbmask ) {
cobj temp =compress(rhs._odata[so+o+b]);
extract<cobj>(temp,pointers,offset);
vobj temp =rhs._odata[so+o+b];
extract<vobj>(temp,pointers,offset);
}
}
}
}
}
//////////////////////////////////////////////////////
// Gather for when there is no need to SIMD split
//////////////////////////////////////////////////////
template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
{
SimpleCompressor<vobj> dontcompress;
Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress);
}
//////////////////////////////////////////////////////
// Gather for when there *is* need to SIMD split
//////////////////////////////////////////////////////
template<class vobj> void Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask)
{
SimpleCompressor<vobj> dontcompress;
Gather_plane_extract<vobj,vobj,decltype(dontcompress)>(rhs,pointers,dimension,plane,cbmask,dontcompress);
}
//////////////////////////////////////////////////////
// Scatter for when there is no need to SIMD split
//////////////////////////////////////////////////////
@ -200,7 +171,7 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
//////////////////////////////////////////////////////
// Scatter for when there *is* need to SIMD split
//////////////////////////////////////////////////////
template<class vobj,class cobj> void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<cobj *> pointers,int dimension,int plane,int cbmask)
template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask)
{
int rd = rhs._grid->_rdimensions[dimension];

View File

@ -154,13 +154,7 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
recv_from_rank,
bytes);
grid->Barrier();
/*
for(int i=0;i<send_buf.size();i++){
assert(recv_buf.size()==buffer_size);
assert(send_buf.size()==buffer_size);
std::cout << "SendRecv_Cshift_comms ["<<i<<" "<< dimension<<"] snd "<<send_buf[i]<<" rcv " << recv_buf[i] << " 0x" << cbmask<<std::endl;
}
*/
Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
}
}
@ -246,13 +240,6 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
(void *)&recv_buf_extract[i][0],
recv_from_rank,
bytes);
/*
for(int w=0;w<recv_buf_extract[i].size();w++){
assert(recv_buf_extract[i].size()==buffer_size);
assert(send_buf_extract[i].size()==buffer_size);
std::cout << "SendRecv_Cshift_comms ["<<w<<" "<< dimension<<"] recv "<<recv_buf_extract[i][w]<<" send " << send_buf_extract[nbr_lane][w] << cbmask<<std::endl;
}
*/
grid->Barrier();
rpointers[i] = &recv_buf_extract[i][0];
} else {

12276
lib/json/json.hpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -235,64 +235,74 @@ public:
}
};
//////////////////////////////////////////////////////////////////
// Constructor requires "grid" passed.
// what about a default grid?
//////////////////////////////////////////////////////////////////
Lattice(GridBase *grid) : _odata(grid->oSites()) {
_grid = grid;
//////////////////////////////////////////////////////////////////
// Constructor requires "grid" passed.
// what about a default grid?
//////////////////////////////////////////////////////////////////
Lattice(GridBase *grid) : _odata(grid->oSites()) {
_grid = grid;
// _odata.reserve(_grid->oSites());
// _odata.resize(_grid->oSites());
// std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl;
assert((((uint64_t)&_odata[0])&0xF) ==0);
checkerboard=0;
}
Lattice(const Lattice& r){ // copy constructor
_grid = r._grid;
checkerboard = r.checkerboard;
_odata.resize(_grid->oSites());// essential
parallel_for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss]=r._odata[ss];
}
}
virtual ~Lattice(void) = default;
assert((((uint64_t)&_odata[0])&0xF) ==0);
checkerboard=0;
}
Lattice(const Lattice& r){ // copy constructor
_grid = r._grid;
checkerboard = r.checkerboard;
_odata.resize(_grid->oSites());// essential
parallel_for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss]=r._odata[ss];
}
}
virtual ~Lattice(void) = default;
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r;
}
return *this;
}
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard;
conformable(*this,r);
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss];
}
return *this;
void reset(GridBase* grid) {
if (_grid != grid) {
_grid = grid;
_odata.resize(grid->oSites());
checkerboard = 0;
}
}
// *=,+=,-= operators inherit behvour from correspond */+/- operation
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
*this = (*this)*r;
return *this;
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r;
}
template<class T> strong_inline Lattice<vobj> &operator -=(const T &r) {
*this = (*this)-r;
return *this;
return *this;
}
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard;
conformable(*this,r);
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss];
}
template<class T> strong_inline Lattice<vobj> &operator +=(const T &r) {
*this = (*this)+r;
return *this;
}
}; // class Lattice
return *this;
}
// *=,+=,-= operators inherit behvour from correspond */+/- operation
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
*this = (*this)*r;
return *this;
}
template<class T> strong_inline Lattice<vobj> &operator -=(const T &r) {
*this = (*this)-r;
return *this;
}
template<class T> strong_inline Lattice<vobj> &operator +=(const T &r) {
*this = (*this)+r;
return *this;
}
}; // class Lattice
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
std::vector<int> gcoor;
typedef typename vobj::scalar_object sobj;
@ -310,7 +320,7 @@ public:
}
return stream;
}
}

View File

@ -1,157 +1,154 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/Lattice_reduction.h
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_LATTICE_REDUCTION_H
#define GRID_LATTICE_REDUCTION_H
#include <Grid/Grid_Eigen_Dense.h>
namespace Grid {
#ifdef GRID_WARN_SUBOPTIMAL
#warning "Optimisation alert all these reduction loops are NOT threaded "
#endif
////////////////////////////////////////////////////////////////////////////////////////////////////
// Deterministic Reduction operations
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
ComplexD nrm = innerProduct(arg,arg);
return std::real(nrm);
////////////////////////////////////////////////////////////////////////////////////////////////////
// Deterministic Reduction operations
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
ComplexD nrm = innerProduct(arg,arg);
return std::real(nrm);
}
// Double inner product
template<class vobj>
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
{
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_typeD vector_type;
scalar_type nrm;
GridBase *grid = left._grid;
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff;
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
for(int ss=myoff;ss<mywork+myoff; ss++){
vnrm = vnrm + innerProductD(left._odata[ss],right._odata[ss]);
}
sumarray[thr]=TensorRemove(vnrm) ;
}
vector_type vvnrm; vvnrm=zero; // sum across threads
for(int i=0;i<grid->SumArraySize();i++){
vvnrm = vvnrm+sumarray[i];
}
nrm = Reduce(vvnrm);// sum across simd
right._grid->GlobalSum(nrm);
return nrm;
}
template<class Op,class T1>
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
{
return sum(closure(expr));
}
template<class vobj>
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
{
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
scalar_type nrm;
GridBase *grid = left._grid;
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
for(int i=0;i<grid->SumArraySize();i++){
sumarray[i]=zero;
}
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff;
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
for(int ss=myoff;ss<mywork+myoff; ss++){
vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
}
sumarray[thr]=TensorRemove(vnrm) ;
}
vector_type vvnrm; vvnrm=zero; // sum across threads
for(int i=0;i<grid->SumArraySize();i++){
vvnrm = vvnrm+sumarray[i];
}
nrm = Reduce(vvnrm);// sum across simd
right._grid->GlobalSum(nrm);
return nrm;
}
template<class Op,class T1>
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
{
return sum(closure(expr));
}
template<class Op,class T1,class T2>
inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
template<class Op,class T1,class T2>
inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object
{
return sum(closure(expr));
}
template<class Op,class T1,class T2,class T3>
inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
eval(0,std::get<1>(expr.second)),
eval(0,std::get<2>(expr.second))
))::scalar_object
{
return sum(closure(expr));
}
template<class vobj>
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg){
GridBase *grid=arg._grid;
int Nsimd = grid->Nsimd();
std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
for(int i=0;i<grid->SumArraySize();i++){
sumarray[i]=zero;
}
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff;
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
vobj vvsum=zero;
for(int ss=myoff;ss<mywork+myoff; ss++){
vvsum = vvsum + arg._odata[ss];
}
sumarray[thr]=vvsum;
}
vobj vsum=zero; // sum across threads
for(int i=0;i<grid->SumArraySize();i++){
vsum = vsum+sumarray[i];
}
typedef typename vobj::scalar_object sobj;
sobj ssum=zero;
std::vector<sobj> buf(Nsimd);
extract(vsum,buf);
for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
arg._grid->GlobalSum(ssum);
return ssum;
{
return sum(closure(expr));
}
template<class Op,class T1,class T2,class T3>
inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
eval(0,std::get<1>(expr.second)),
eval(0,std::get<2>(expr.second))
))::scalar_object
{
return sum(closure(expr));
}
template<class vobj>
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
{
GridBase *grid=arg._grid;
int Nsimd = grid->Nsimd();
std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
for(int i=0;i<grid->SumArraySize();i++){
sumarray[i]=zero;
}
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff;
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
vobj vvsum=zero;
for(int ss=myoff;ss<mywork+myoff; ss++){
vvsum = vvsum + arg._odata[ss];
}
sumarray[thr]=vvsum;
}
vobj vsum=zero; // sum across threads
for(int i=0;i<grid->SumArraySize();i++){
vsum = vsum+sumarray[i];
}
typedef typename vobj::scalar_object sobj;
sobj ssum=zero;
std::vector<sobj> buf(Nsimd);
extract(vsum,buf);
for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
arg._grid->GlobalSum(ssum);
return ssum;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// sliceSum, sliceInnerProduct, sliceAxpy, sliceNorm etc...
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
{
///////////////////////////////////////////////////////
// FIXME precision promoted summation
// may be important for correlation functions
// But easily avoided by using double precision fields
///////////////////////////////////////////////////////
typedef typename vobj::scalar_object sobj;
GridBase *grid = Data._grid;
assert(grid!=NULL);
// FIXME
// std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd();
@ -163,23 +160,31 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
int rd=grid->_rdimensions[orthogdim];
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars
std::vector<sobj> extracted(Nsimd); // splitting the SIMD
std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars
std::vector<sobj> extracted(Nsimd); // splitting the SIMD
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
result.resize(fd); // And then global sum to return the same vector to every node
for(int r=0;r<rd;r++){
lvSum[r]=zero;
}
std::vector<int> coor(Nd);
int e1= grid->_slice_nblock[orthogdim];
int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim];
// sum over reduced dimension planes, breaking out orthog dir
// Parallel over orthog direction
parallel_for(int r=0;r<rd;r++){
for(int ss=0;ss<grid->oSites();ss++){
Lexicographic::CoorFromIndex(coor,ss,grid->_rdimensions);
int r = coor[orthogdim];
lvSum[r]=lvSum[r]+Data._odata[ss];
}
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int ss= so+n*stride+b;
lvSum[r]=lvSum[r]+Data._odata[ss];
}
}
}
// Sum across simd lanes in the plane, breaking out orthog dir.
std::vector<int> icoor(Nd);
@ -214,10 +219,303 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
result[t]=gsum;
}
}
template<class vobj>
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
{
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
GridBase *grid = lhs._grid;
assert(grid!=NULL);
conformable(grid,rhs._grid);
const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd();
assert(orthogdim >= 0);
assert(orthogdim < Nd);
int fd=grid->_fdimensions[orthogdim];
int ld=grid->_ldimensions[orthogdim];
int rd=grid->_rdimensions[orthogdim];
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
std::vector<scalar_type > lsSum(ld,scalar_type(0.0)); // sum across these down to scalars
std::vector<iScalar<scalar_type> > extracted(Nsimd); // splitting the SIMD
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
for(int r=0;r<rd;r++){
lvSum[r]=zero;
}
int e1= grid->_slice_nblock[orthogdim];
int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim];
parallel_for(int r=0;r<rd;r++){
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int ss= so+n*stride+b;
vector_type vv = TensorRemove(innerProduct(lhs._odata[ss],rhs._odata[ss]));
lvSum[r]=lvSum[r]+vv;
}
}
}
// Sum across simd lanes in the plane, breaking out orthog dir.
std::vector<int> icoor(Nd);
for(int rt=0;rt<rd;rt++){
iScalar<vector_type> temp;
temp._internal = lvSum[rt];
extract(temp,extracted);
for(int idx=0;idx<Nsimd;idx++){
grid->iCoorFromIindex(icoor,idx);
int ldx =rt+icoor[orthogdim]*rd;
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
}
}
// sum over nodes.
scalar_type gsum;
for(int t=0;t<fd;t++){
int pt = t/ld; // processor plane
int lt = t%ld;
if ( pt == grid->_processor_coor[orthogdim] ) {
gsum=lsSum[lt];
} else {
gsum=scalar_type(0.0);
}
grid->GlobalSum(gsum);
result[t]=gsum;
}
}
template<class vobj>
static void sliceNorm (std::vector<RealD> &sn,const Lattice<vobj> &rhs,int Orthog)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int Nblock = rhs._grid->GlobalDimensions()[Orthog];
std::vector<ComplexD> ip(Nblock);
sn.resize(Nblock);
sliceInnerProductVector(ip,rhs,rhs,Orthog);
for(int ss=0;ss<Nblock;ss++){
sn[ss] = real(ip[ss]);
}
};
template<class vobj>
static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y,
int orthogdim,RealD scale=1.0)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
typedef typename vobj::tensor_reduced tensor_reduced;
GridBase *grid = X._grid;
int Nsimd =grid->Nsimd();
int Nblock =grid->GlobalDimensions()[orthogdim];
int fd =grid->_fdimensions[orthogdim];
int ld =grid->_ldimensions[orthogdim];
int rd =grid->_rdimensions[orthogdim];
int e1 =grid->_slice_nblock[orthogdim];
int e2 =grid->_slice_block [orthogdim];
int stride =grid->_slice_stride[orthogdim];
std::vector<int> icoor;
for(int r=0;r<rd;r++){
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
vector_type av;
for(int l=0;l<Nsimd;l++){
grid->iCoorFromIindex(icoor,l);
int ldx =r+icoor[orthogdim]*rd;
scalar_type *as =(scalar_type *)&av;
as[l] = scalar_type(a[ldx])*scale;
}
tensor_reduced at; at=av;
parallel_for_nest2(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int ss= so+n*stride+b;
R._odata[ss] = at*X._odata[ss]+Y._odata[ss];
}
}
}
};
/*
template<class vobj>
static void sliceMaddVectorSlow (Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y,
int Orthog,RealD scale=1.0)
{
// FIXME: Implementation is slow
// Best base the linear combination by constructing a
// set of vectors of size grid->_rdimensions[Orthog].
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int Nblock = X._grid->GlobalDimensions()[Orthog];
GridBase *FullGrid = X._grid;
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
Lattice<vobj> Xslice(SliceGrid);
Lattice<vobj> Rslice(SliceGrid);
// If we based this on Cshift it would work for spread out
// but it would be even slower
for(int i=0;i<Nblock;i++){
ExtractSlice(Rslice,Y,i,Orthog);
ExtractSlice(Xslice,X,i,Orthog);
Rslice = Rslice + Xslice*(scale*a[i]);
InsertSlice(Rslice,R,i,Orthog);
}
};
template<class vobj>
static void sliceInnerProductVectorSlow( std::vector<ComplexD> & vec, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
{
// FIXME: Implementation is slow
// Look at localInnerProduct implementation,
// and do inside a site loop with block strided iterators
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
typedef typename vobj::tensor_reduced scalar;
typedef typename scalar::scalar_object scomplex;
int Nblock = lhs._grid->GlobalDimensions()[Orthog];
vec.resize(Nblock);
std::vector<scomplex> sip(Nblock);
Lattice<scalar> IP(lhs._grid);
IP=localInnerProduct(lhs,rhs);
sliceSum(IP,sip,Orthog);
for(int ss=0;ss<Nblock;ss++){
vec[ss] = TensorRemove(sip[ss]);
}
}
*/
//////////////////////////////////////////////////////////////////////////////////////////
// FIXME: Implementation is slow
// If we based this on Cshift it would work for spread out
// but it would be even slower
//
// Repeated extract slice is inefficient
//
// Best base the linear combination by constructing a
// set of vectors of size grid->_rdimensions[Orthog].
//////////////////////////////////////////////////////////////////////////////////////////
inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog)
{
int NN = BlockSolverGrid->_ndimension;
int nsimd = BlockSolverGrid->Nsimd();
std::vector<int> latt_phys(0);
std::vector<int> simd_phys(0);
std::vector<int> mpi_phys(0);
for(int d=0;d<NN;d++){
if( d!=Orthog ) {
latt_phys.push_back(BlockSolverGrid->_fdimensions[d]);
simd_phys.push_back(BlockSolverGrid->_simd_layout[d]);
mpi_phys.push_back(BlockSolverGrid->_processors[d]);
}
}
return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);
}
template<class vobj>
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int Nblock = X._grid->GlobalDimensions()[Orthog];
GridBase *FullGrid = X._grid;
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
Lattice<vobj> Xslice(SliceGrid);
Lattice<vobj> Rslice(SliceGrid);
for(int i=0;i<Nblock;i++){
ExtractSlice(Rslice,Y,i,Orthog);
for(int j=0;j<Nblock;j++){
ExtractSlice(Xslice,X,j,Orthog);
Rslice = Rslice + Xslice*(scale*aa(j,i));
}
InsertSlice(Rslice,R,i,Orthog);
}
};
template<class vobj>
static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
{
// FIXME: Implementation is slow
// Not sure of best solution.. think about it
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
GridBase *FullGrid = lhs._grid;
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
int Nblock = FullGrid->GlobalDimensions()[Orthog];
Lattice<vobj> Lslice(SliceGrid);
Lattice<vobj> Rslice(SliceGrid);
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
for(int i=0;i<Nblock;i++){
ExtractSlice(Lslice,lhs,i,Orthog);
for(int j=0;j<Nblock;j++){
ExtractSlice(Rslice,rhs,j,Orthog);
mat(i,j) = innerProduct(Lslice,Rslice);
}
}
#undef FORCE_DIAG
#ifdef FORCE_DIAG
for(int i=0;i<Nblock;i++){
for(int j=0;j<Nblock;j++){
if ( i != j ) mat(i,j)=0.0;
}
}
#endif
return;
}
} /*END NAMESPACE GRID*/
#endif

View File

@ -6,8 +6,8 @@
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -75,6 +75,55 @@ namespace Grid {
return multiplicity;
}
// merge of April 11 2017
//<<<<<<< HEAD
// this function is necessary for the LS vectorised field
inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
{
int rngdims = coarse->_ndimension;
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
int lowerdims = fine->_ndimension - coarse->_ndimension; assert(lowerdims >= 0);
// assumes that the higher dimensions are not using more processors
// all further divisions are local
for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1);
for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]);
// then divide the number of local sites
// check that the total number of sims agree, meanse the iSites are the same
assert(fine->Nsimd() == coarse->Nsimd());
// check that the two grids divide cleanly
assert( (fine->lSites() / coarse->lSites() ) * coarse->lSites() == fine->lSites() );
return fine->lSites() / coarse->lSites();
}
/*
// Wrap seed_seq to give common interface with random_device
class fixedSeed {
public:
typedef std::seed_seq::result_type result_type;
std::seed_seq src;
fixedSeed(const std::vector<int> &seeds) : src(seeds.begin(),seeds.end()) {};
result_type operator () (void){
std::vector<result_type> list(1);
src.generate(list.begin(),list.end());
return list[0];
}
};
=======
>>>>>>> develop
*/
// real scalars are one component
template<class scalar,class distribution,class generator>
void fillScalar(scalar &s,distribution &dist,generator & gen)
@ -109,7 +158,7 @@ namespace Grid {
#ifdef RNG_SITMO
typedef sitmo::prng_engine RngEngine;
typedef uint64_t RngStateType;
static const int RngStateCount = 4;
static const int RngStateCount = 13;
#endif
std::vector<RngEngine> _generators;
@ -164,7 +213,7 @@ namespace Grid {
ss<<eng;
ss.seekg(0,ss.beg);
for(int i=0;i<RngStateCount;i++){
ss>>saved[i];
ss>>saved[i];
}
}
void GetState(std::vector<RngStateType> & saved,int gen) {
@ -174,7 +223,7 @@ namespace Grid {
assert(saved.size()==RngStateCount);
std::stringstream ss;
for(int i=0;i<RngStateCount;i++){
ss<< saved[i]<<" ";
ss<< saved[i]<<" ";
}
ss.seekg(0,ss.beg);
ss>>eng;
@ -215,7 +264,7 @@ namespace Grid {
dist[0].reset();
for(int idx=0;idx<words;idx++){
fillScalar(buf[idx],dist[0],_generators[0]);
fillScalar(buf[idx],dist[0],_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
@ -247,7 +296,7 @@ namespace Grid {
RealF *pointer=(RealF *)&l;
dist[0].reset();
for(int i=0;i<2*vComplexF::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]);
fillScalar(pointer[i],dist[0],_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
@ -255,7 +304,7 @@ namespace Grid {
RealD *pointer=(RealD *)&l;
dist[0].reset();
for(int i=0;i<2*vComplexD::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]);
fillScalar(pointer[i],dist[0],_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
@ -263,7 +312,7 @@ namespace Grid {
RealF *pointer=(RealF *)&l;
dist[0].reset();
for(int i=0;i<vRealF::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]);
fillScalar(pointer[i],dist[0],_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
@ -275,7 +324,7 @@ namespace Grid {
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
void SeedFixedIntegers(const std::vector<int> &seeds){
CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
std::seed_seq src(seeds.begin(),seeds.end());
@ -284,18 +333,20 @@ namespace Grid {
};
class GridParallelRNG : public GridRNGbase {
double _time_counter;
public:
GridBase *_grid;
int _vol;
public:
unsigned int _vol;
int generator_idx(int os,int is){
int generator_idx(int os,int is) {
return is*_grid->oSites()+os;
}
GridParallelRNG(GridBase *grid) : GridRNGbase() {
_grid=grid;
_vol =_grid->iSites()*_grid->oSites();
_grid = grid;
_vol =_grid->iSites()*_grid->oSites();
_generators.resize(_vol);
_uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1});
@ -309,33 +360,34 @@ namespace Grid {
typedef typename vobj::scalar_object scalar_object;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int multiplicity = RNGfillable(_grid,l._grid);
int Nsimd =_grid->Nsimd();
int osites=_grid->oSites();
int words=sizeof(scalar_object)/sizeof(scalar_type);
double inner_time_counter = usecond();
int multiplicity = RNGfillable_general(_grid, l._grid); // l has finer or same grid
int Nsimd = _grid->Nsimd(); // guaranteed to be the same for l._grid too
int osites = _grid->oSites(); // guaranteed to be <= l._grid->oSites() by a factor multiplicity
int words = sizeof(scalar_object) / sizeof(scalar_type);
parallel_for(int ss=0;ss<osites;ss++){
std::vector<scalar_object> buf(Nsimd);
for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times
std::vector<scalar_object> buf(Nsimd);
for(int m=0;m<multiplicity;m++) {// Draw from same generator multiplicity times
int sm = multiplicity * ss + m; // Maps the generator site to the fine site
int sm=multiplicity*ss+m; // Maps the generator site to the fine site
for(int si=0;si<Nsimd;si++){
int gdx = generator_idx(ss,si); // index of generator state
scalar_type *pointer = (scalar_type *)&buf[si];
dist[gdx].reset();
for(int idx=0;idx<words;idx++){
fillScalar(pointer[idx],dist[gdx],_generators[gdx]);
}
}
// merge into SIMD lanes
merge(l._odata[sm],buf);
}
for (int si = 0; si < Nsimd; si++) {
int gdx = generator_idx(ss, si); // index of generator state
scalar_type *pointer = (scalar_type *)&buf[si];
dist[gdx].reset();
for (int idx = 0; idx < words; idx++)
fillScalar(pointer[idx], dist[gdx], _generators[gdx]);
}
// merge into SIMD lanes, FIXME suboptimal implementation
merge(l._odata[sm], buf);
}
}
_time_counter += usecond()- inner_time_counter;
};
void SeedFixedIntegers(const std::vector<int> &seeds){
@ -412,6 +464,12 @@ namespace Grid {
}
#endif
}
void Report(){
std::cout << GridLogMessage << "Time spent in the fill() routine by GridParallelRNG: "<< _time_counter/1e3 << " ms" << std::endl;
}
////////////////////////////////////////////////////////////////////////
// Support for rigorous test of RNG's
// Return uniform random uint32_t from requested site generator
@ -419,7 +477,6 @@ namespace Grid {
uint32_t GlobalU01(int gsite){
uint32_t the_number;
// who
std::vector<int> gcoor;
int rank,o_idx,i_idx;

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -359,7 +359,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
template<class vobj>
void InsertSlice(Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog)
void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog)
{
typedef typename vobj::scalar_object sobj;
@ -401,7 +401,7 @@ void InsertSlice(Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int
}
template<class vobj>
void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, int orthog)
void ExtractSlice(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slice, int orthog)
{
typedef typename vobj::scalar_object sobj;
@ -444,7 +444,7 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in
template<class vobj>
void InsertSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{
typedef typename vobj::scalar_object sobj;

View File

@ -30,6 +30,7 @@ directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/GridCore.h>
#include <Grid/util/CompilerCompatible.h>
#include <cxxabi.h>
#include <memory>

View File

@ -110,8 +110,8 @@ public:
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
if ( log.active ) {
stream << log.background()<< std::setw(10) << std::left << log.topName << log.background()<< " : ";
stream << log.colour() << std::setw(14) << std::left << log.name << log.background() << " : ";
stream << log.background()<< std::setw(8) << std::left << log.topName << log.background()<< " : ";
stream << log.colour() << std::setw(10) << std::left << log.name << log.background() << " : ";
if ( log.timestamp ) {
StopWatch.Stop();
GridTime now = StopWatch.Elapsed();

View File

@ -6,8 +6,8 @@
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu<guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -30,6 +30,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define GRID_BINARY_IO_H
#include "IldgIOtypes.h"
#ifdef HAVE_ENDIAN_H
#include <endian.h>
#endif
@ -149,7 +151,48 @@ class BinaryIO {
csum=csum+buf[i];
}
}
// Simple classes for precision conversion
template <class fobj, class sobj>
struct BinarySimpleUnmunger {
typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
void operator()(sobj &in, fobj &out, uint32_t &csum) {
// take word by word and transform accoding to the status
fobj_stype *out_buffer = (fobj_stype *)&out;
sobj_stype *in_buffer = (sobj_stype *)&in;
size_t fobj_words = sizeof(out) / sizeof(fobj_stype);
size_t sobj_words = sizeof(in) / sizeof(sobj_stype);
assert(fobj_words == sobj_words);
for (unsigned int word = 0; word < sobj_words; word++)
out_buffer[word] = in_buffer[word]; // type conversion on the fly
BinaryIO::Uint32Checksum((uint32_t *)&out, sizeof(out), csum);
}
};
template <class fobj, class sobj>
struct BinarySimpleMunger {
typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
void operator()(fobj &in, sobj &out, uint32_t &csum) {
// take word by word and transform accoding to the status
fobj_stype *in_buffer = (fobj_stype *)&in;
sobj_stype *out_buffer = (sobj_stype *)&out;
size_t fobj_words = sizeof(in) / sizeof(fobj_stype);
size_t sobj_words = sizeof(out) / sizeof(sobj_stype);
assert(fobj_words == sobj_words);
for (unsigned int word = 0; word < sobj_words; word++)
out_buffer[word] = in_buffer[word]; // type conversion on the fly
BinaryIO::Uint32Checksum((uint32_t *)&in, sizeof(in), csum);
}
};
template<class vobj,class fobj,class munger>
static inline uint32_t readObjectSerial(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string &format)
{
@ -188,9 +231,9 @@ class BinaryIO {
fin.read((char *)&file_object, sizeof(file_object));assert( fin.fail()==0);
bytes += sizeof(file_object);
if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object));
if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object));
if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object));
if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object));
if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object));
if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object));
munge(file_object, munged, csum);
}
@ -209,7 +252,7 @@ class BinaryIO {
static inline uint32_t writeObjectSerial(Lattice<vobj> &Umu,std::string file,munger munge,int offset,
const std::string & format)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_object sobj;
GridBase *grid = Umu._grid;
@ -246,7 +289,6 @@ class BinaryIO {
if ( grid->IsBoss() ) {
if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
if(ieee32) htole32_v((void *)&file_object,sizeof(file_object));
if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
@ -270,24 +312,29 @@ class BinaryIO {
typedef typename GridSerialRNG::RngStateType RngStateType;
const int RngStateCount = GridSerialRNG::RngStateCount;
GridBase *grid = parallel._grid;
int gsites = grid->_gsites;
GridStopWatch timer; timer.Start();
//////////////////////////////////////////////////
// Serialise through node zero
//////////////////////////////////////////////////
std::cout<< GridLogMessage<< "Serial RNG write I/O "<< file<<std::endl;
std::ofstream fout;
if ( grid->IsBoss() ) {
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
if (grid->IsBoss()) {
fout.open(file, std::ios::binary | std::ios::out);
if (!fout.is_open()) {
std::cout << GridLogMessage << "writeRNGSerial: Error opening file " << file << std::endl;
exit(0);// write better error handling
}
fout.seekp(offset);
}
uint32_t csum=0;
std::cout << GridLogMessage << "Serial RNG write I/O on file " << file << std::endl;
uint32_t csum = 0;
std::vector<RngStateType> saved(RngStateCount);
int bytes = sizeof(RngStateType)*saved.size();
int bytes = sizeof(RngStateType) * saved.size();
std::cout << GridLogDebug << "RngStateCount: " << RngStateCount << std::endl;
std::cout << GridLogDebug << "Type has " << bytes << " bytes" << std::endl;
std::vector<int> gcoor;
for(int gidx=0;gidx<gsites;gidx++){
@ -301,8 +348,7 @@ class BinaryIO {
// std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl;
parallel.GetState(saved,l_idx);
}
grid->Broadcast(rank,(void *)&saved[0],bytes);
grid->Broadcast(rank, (void *)&saved[0], bytes);
if ( grid->IsBoss() ) {
Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
@ -316,9 +362,20 @@ class BinaryIO {
Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
fout.write((char *)&saved[0],bytes);assert( fout.fail()==0);
}
grid->Broadcast(0,(void *)&csum,sizeof(csum));
grid->Broadcast(0, (void *)&csum, sizeof(csum));
if (grid->IsBoss())
fout.close();
timer.Stop();
std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl;
std::cout << GridLogMessage << "RNG state saved in " << timer.Elapsed() << std::endl;
return csum;
}
static inline uint32_t readRNGSerial(GridSerialRNG &serial,GridParallelRNG &parallel,std::string file,int offset)
{
typedef typename GridSerialRNG::RngStateType RngStateType;
@ -330,34 +387,46 @@ class BinaryIO {
//////////////////////////////////////////////////
// Serialise through node zero
//////////////////////////////////////////////////
std::cout<< GridLogMessage<< "Serial RNG read I/O "<< file<<std::endl;
std::cout<< GridLogMessage<< "Serial RNG read I/O of file "<<file<<std::endl;
std::ifstream fin;
if (grid->IsBoss()) {
fin.open(file, std::ios::binary | std::ios::in);
if (!fin.is_open()) {
std::cout << GridLogMessage << "readRNGSerial: Error opening file " << file << std::endl;
exit(0);// write better error handling
}
fin.seekg(offset);
}
std::ifstream fin(file,std::ios::binary|std::ios::in);
fin.seekg(offset);
uint32_t csum=0;
std::vector<RngStateType> saved(RngStateCount);
int bytes = sizeof(RngStateType)*saved.size();
std::cout << GridLogDebug << "RngStateCount: " << RngStateCount << std::endl;
std::cout << GridLogDebug << "Type has " << bytes << " bytes" << std::endl;
std::vector<int> gcoor;
std::cout << GridLogDebug << "gsites: " << gsites << " loop" << std::endl;
for(int gidx=0;gidx<gsites;gidx++){
int rank,o_idx,i_idx;
grid->GlobalIndexToGlobalCoor(gidx,gcoor);
grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
int l_idx=parallel.generator_idx(o_idx,i_idx);
//std::cout << GridLogDebug << "l_idx " << l_idx << " o_idx " << o_idx
// << " i_idx " << i_idx << " rank " << rank << std::endl;
if ( grid->IsBoss() ) {
fin.read((char *)&saved[0],bytes);assert( fin.fail()==0);
Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
}
grid->Broadcast(0,(void *)&saved[0],bytes);
if( rank == grid->ThisRank() ){
parallel.SetState(saved,l_idx);
parallel.SetState(saved,l_idx);
}
}
if ( grid->IsBoss() ) {
@ -366,16 +435,21 @@ class BinaryIO {
Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
}
std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl;
grid->Broadcast(0,(void *)&csum,sizeof(csum));
return csum;
}
template<class vobj,class fobj,class munger>
static inline uint32_t readObjectParallel(Lattice<vobj> &Umu,std::string file,munger munge,int offset,
const std::string &format)
{
template <class vobj, class fobj, class munger>
static inline uint32_t readObjectParallel(Lattice<vobj> &Umu,
std::string file,
munger munge,
int offset,
const std::string &format,
ILDGtype ILDG = ILDGtype()) {
typedef typename vobj::scalar_object sobj;
GridBase *grid = Umu._grid;
@ -441,9 +515,10 @@ class BinaryIO {
int myrank = grid->ThisRank();
int iorank = grid->RankFromProcessorCoor(ioproc);
if ( IOnode ) {
fin.open(file,std::ios::binary|std::ios::in);
}
if (!ILDG.is_ILDG)
if ( IOnode ) {
fin.open(file,std::ios::binary|std::ios::in);
}
//////////////////////////////////////////////////////////
// Find the location of each site and send to primary node
@ -451,13 +526,14 @@ class BinaryIO {
// available (how short sighted is that?)
//////////////////////////////////////////////////////////
Umu = zero;
static uint32_t csum; csum=0;
static uint32_t csum; csum=0;//static for SHMEM
fobj fileObj;
static sobj siteObj; // Static to place in symmetric region for SHMEM
// need to implement these loops in Nd independent way with a lexico conversion
for(int tlex=0;tlex<slice_vol;tlex++){
std::vector<int> tsite(nd); // temporary mixed up site
std::vector<int> gsite(nd);
std::vector<int> lsite(nd);
@ -470,6 +546,7 @@ class BinaryIO {
gsite[d] = tsite[d]+start[d]; // global site
}
/////////////////////////
// Get the rank of owner of data
/////////////////////////
@ -481,18 +558,28 @@ class BinaryIO {
// iorank reads from the seek
////////////////////////////////
if (myrank == iorank) {
fin.seekg(offset+g_idx*sizeof(fileObj));
fin.read((char *)&fileObj,sizeof(fileObj));assert( fin.fail()==0);
bytes+=sizeof(fileObj);
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj));
munge(fileObj,siteObj,csum);
if (ILDG.is_ILDG){
// use C-LIME to populate the record
#ifdef HAVE_LIME
uint64_t sizeFO = sizeof(fileObj);
limeReaderSeek(ILDG.LR, g_idx*sizeFO, SEEK_SET);
int status = limeReaderReadData((void *)&fileObj, &sizeFO, ILDG.LR);
#endif
} else{
fin.seekg(offset+g_idx*sizeof(fileObj));
fin.read((char *)&fileObj,sizeof(fileObj));
}
bytes+=sizeof(fileObj);
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj));
munge(fileObj,siteObj,csum);
}
// Possibly do transport through pt2pt
@ -515,32 +602,42 @@ class BinaryIO {
timer.Stop();
std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
return csum;
}
//////////////////////////////////////////////////////////
// Parallel writer
//////////////////////////////////////////////////////////
template<class vobj,class fobj,class munger>
static inline uint32_t writeObjectParallel(Lattice<vobj> &Umu,std::string file,munger munge,int offset,
const std::string & format)
{
template <class vobj, class fobj, class munger>
static inline uint32_t writeObjectParallel(Lattice<vobj> &Umu,
std::string file, munger munge,
int offset,
const std::string &format,
ILDGtype ILDG = ILDGtype()) {
typedef typename vobj::scalar_object sobj;
GridBase *grid = Umu._grid;
int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32"));
int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64"));
int ieee64 = (format == std::string("IEEE64"));
if (!(ieee32big || ieee32 || ieee64big || ieee64)) {
std::cout << GridLogError << "Unrecognized file format " << format
<< std::endl;
std::cout << GridLogError
<< "Allowed: IEEE32BIG | IEEE32 | IEEE64BIG | IEEE64"
<< std::endl;
exit(0);
}
int nd = grid->_ndimension;
for(int d=0;d<nd;d++){
for (int d = 0; d < nd; d++) {
assert(grid->CheckerBoarded(d) == 0);
}
std::vector<int> parallel(nd,1);
std::vector<int> ioproc (nd);
std::vector<int> parallel(nd, 1);
std::vector<int> ioproc(nd);
std::vector<int> start(nd);
std::vector<int> range(nd);
@ -548,9 +645,8 @@ class BinaryIO {
int IOnode = 1;
for(int d=0;d<grid->_ndimension;d++) {
if ( d!= grid->_ndimension-1 ) parallel[d] = 0;
for (int d = 0; d < grid->_ndimension; d++) {
if (d != grid->_ndimension - 1) parallel[d] = 0;
if (parallel[d]) {
range[d] = grid->_ldimensions[d];
@ -566,11 +662,12 @@ class BinaryIO {
slice_vol = slice_vol * range[d];
}
{
uint32_t tmp = IOnode;
grid->GlobalSum(tmp);
std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice ";
std::cout<< GridLogMessage<< "Parallel write I/O from "<< file
<< " with " <<tmp<< " IOnodes for subslice ";
for(int d=0;d<grid->_ndimension;d++){
std::cout<< range[d];
if( d< grid->_ndimension-1 )
@ -579,7 +676,8 @@ class BinaryIO {
std::cout << std::endl;
}
GridStopWatch timer; timer.Start();
GridStopWatch timer;
timer.Start();
uint64_t bytes=0;
int myrank = grid->ThisRank();
@ -590,48 +688,58 @@ class BinaryIO {
// Ideally one reader/writer per xy plane and read these contiguously
// with comms from nominated I/O nodes.
std::ofstream fout;
if ( IOnode ) fout.open(file,std::ios::binary|std::ios::in|std::ios::out);
if (!ILDG.is_ILDG)
if (IOnode){
fout.open(file, std::ios::binary | std::ios::in | std::ios::out);
if (!fout.is_open()) {
std::cout << GridLogMessage << "writeObjectParallel: Error opening file " << file
<< std::endl;
exit(0);
}
}
//////////////////////////////////////////////////////////
// Find the location of each site and send to primary node
// Take loop order from Chroma; defines loop order now that NERSC doc no longer
// Take loop order from Chroma; defines loop order now that NERSC doc no
// longer
// available (how short sighted is that?)
//////////////////////////////////////////////////////////
uint32_t csum=0;
uint32_t csum = 0;
fobj fileObj;
static sobj siteObj; // static for SHMEM target; otherwise dynamic allocate with AlignedAllocator
static sobj siteObj; // static for SHMEM target; otherwise dynamic allocate
// with AlignedAllocator
// should aggregate a whole chunk and then write.
// need to implement these loops in Nd independent way with a lexico conversion
for(int tlex=0;tlex<slice_vol;tlex++){
std::vector<int> tsite(nd); // temporary mixed up site
// need to implement these loops in Nd independent way with a lexico
// conversion
for (int tlex = 0; tlex < slice_vol; tlex++) {
std::vector<int> tsite(nd); // temporary mixed up site
std::vector<int> gsite(nd);
std::vector<int> lsite(nd);
std::vector<int> iosite(nd);
Lexicographic::CoorFromIndex(tsite,tlex,range);
Lexicographic::CoorFromIndex(tsite, tlex, range);
for(int d=0;d<nd;d++){
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
gsite[d] = tsite[d]+start[d]; // global site
for(int d = 0;d < nd; d++){
lsite[d] = tsite[d] % grid->_ldimensions[d]; // local site
gsite[d] = tsite[d] + start[d]; // global site
}
/////////////////////////
// Get the rank of owner of data
/////////////////////////
int rank, o_idx,i_idx, g_idx;
grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite);
grid->GlobalCoorToGlobalIndex(gsite,g_idx);
int rank, o_idx, i_idx, g_idx;
grid->GlobalCoorToRankIndex(rank, o_idx, i_idx, gsite);
grid->GlobalCoorToGlobalIndex(gsite, g_idx);
////////////////////////////////
// iorank writes from the seek
////////////////////////////////
// Owner of data peeks it
peekLocalSite(siteObj,Umu,lsite);
peekLocalSite(siteObj, Umu, lsite);
// Pair of nodes may need to do pt2pt send
if ( rank != iorank ) { // comms is necessary
@ -641,20 +749,30 @@ class BinaryIO {
}
}
grid->Barrier(); // necessary?
grid->Barrier(); // necessary?
if (myrank == iorank) {
munge(siteObj,fileObj,csum);
if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj));
if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj));
fout.seekp(offset+g_idx*sizeof(fileObj));
fout.write((char *)&fileObj,sizeof(fileObj));assert( fout.fail()==0);
bytes+=sizeof(fileObj);
munge(siteObj, fileObj, csum);
if (ieee32big) htobe32_v((void *)&fileObj, sizeof(fileObj));
if (ieee32) htole32_v((void *)&fileObj, sizeof(fileObj));
if (ieee64big) htobe64_v((void *)&fileObj, sizeof(fileObj));
if (ieee64) htole64_v((void *)&fileObj, sizeof(fileObj));
if (ILDG.is_ILDG) {
#ifdef HAVE_LIME
uint64_t sizeFO = sizeof(fileObj);
limeWriterSeek(ILDG.LW, g_idx*sizeFO, SEEK_SET);
int status = limeWriteRecordData((void *)&fileObj, &sizeFO, ILDG.LW);
#endif
}
else {
fout.seekp(offset + g_idx * sizeof(fileObj));
fout.write((char *)&fileObj, sizeof(fileObj));assert( fout.fail()==0);
}
bytes += sizeof(fileObj);
}
}
@ -662,14 +780,20 @@ class BinaryIO {
grid->GlobalSum(bytes);
timer.Stop();
std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
std::cout << GridLogPerformance << "writeObjectParallel: wrote " << bytes
<< " bytes in " << timer.Elapsed() << " "
<< (double)bytes / timer.useconds() << " MB/s " << std::endl;
grid->Barrier(); // necessary?
if (IOnode)
fout.close();
return csum;
}
};
}
#endif

251
lib/parallelIO/IldgIO.h Normal file
View File

@ -0,0 +1,251 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/parallelIO/IldgIO.h
Copyright (C) 2015
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_ILDG_IO_H
#define GRID_ILDG_IO_H
#include <algorithm>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <pwd.h>
#include <sys/utsname.h>
#include <unistd.h>
#ifdef HAVE_LIME
extern "C" { // for linkage
#include "lime.h"
}
namespace Grid {
namespace QCD {
inline void ILDGGrid(GridBase *grid, ILDGField &header) {
assert(grid->_ndimension == 4); // emit error if not
header.dimension.resize(4);
header.boundary.resize(4);
for (int d = 0; d < 4; d++) {
header.dimension[d] = grid->_fdimensions[d];
// Read boundary conditions from ... ?
header.boundary[d] = std::string("periodic");
}
}
inline void ILDGChecksum(uint32_t *buf, uint32_t buf_size_bytes,
uint32_t &csum) {
BinaryIO::Uint32Checksum(buf, buf_size_bytes, csum);
}
//////////////////////////////////////////////////////////////////////
// Utilities ; these are QCD aware
//////////////////////////////////////////////////////////////////////
template <class GaugeField>
inline void ILDGStatistics(GaugeField &data, ILDGField &header) {
// How to convert data precision etc...
header.link_trace = Grid::QCD::WilsonLoops<PeriodicGimplR>::linkTrace(data);
header.plaquette = Grid::QCD::WilsonLoops<PeriodicGimplR>::avgPlaquette(data);
// header.polyakov =
}
// Forcing QCD here
template <class fobj, class sobj>
struct ILDGMunger {
void operator()(fobj &in, sobj &out, uint32_t &csum) {
for (int mu = 0; mu < 4; mu++) {
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}
}
}
ILDGChecksum((uint32_t *)&in, sizeof(in), csum);
};
};
template <class fobj, class sobj>
struct ILDGUnmunger {
void operator()(sobj &in, fobj &out, uint32_t &csum) {
for (int mu = 0; mu < 4; mu++) {
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}
}
}
ILDGChecksum((uint32_t *)&out, sizeof(out), csum);
};
};
////////////////////////////////////////////////////////////////////////////////
// Write and read from fstream; compute header offset for payload
////////////////////////////////////////////////////////////////////////////////
enum ILDGstate {ILDGread, ILDGwrite};
class ILDGIO : public BinaryIO {
FILE *File;
LimeWriter *LimeW;
LimeRecordHeader *LimeHeader;
LimeReader *LimeR;
std::string filename;
public:
ILDGIO(std::string file, ILDGstate RW) {
filename = file;
if (RW == ILDGwrite){
File = fopen(file.c_str(), "w");
// check if opened correctly
LimeW = limeCreateWriter(File);
} else {
File = fopen(file.c_str(), "r");
// check if opened correctly
LimeR = limeCreateReader(File);
}
}
~ILDGIO() { fclose(File); }
int createHeader(std::string message, int MB, int ME, size_t PayloadSize, LimeWriter* L){
LimeRecordHeader *h;
h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
int status = limeWriteRecordHeader(h, L);
if (status < 0) {
std::cerr << "ILDG Header error\n";
return status;
}
limeDestroyHeader(h);
return LIME_SUCCESS;
}
unsigned int writeHeader(ILDGField &header) {
// write header in LIME
n_uint64_t nbytes;
int MB_flag = 1, ME_flag = 0;
char message[] = "ildg-format";
nbytes = strlen(message);
LimeHeader = limeCreateHeader(MB_flag, ME_flag, message, nbytes);
limeWriteRecordHeader(LimeHeader, LimeW);
limeDestroyHeader(LimeHeader);
// save the xml header here
// use the xml_writer to c++ streams in pugixml
// and convert to char message
limeWriteRecordData(message, &nbytes, LimeW);
limeWriterCloseRecord(LimeW);
return 0;
}
unsigned int readHeader(ILDGField &header) {
return 0;
}
template <class vsimd>
uint32_t readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu) {
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef LorentzColourMatrixD sobjd;
typedef LorentzColourMatrixF sobjf;
typedef iLorentzColourMatrix<vsimd> itype;
typedef LorentzColourMatrix sobj;
GridBase *grid = Umu._grid;
ILDGField header;
readHeader(header);
// now just the conf, ignore the header
std::string format = std::string("IEEE64BIG");
do {limeReaderNextRecord(LimeR);}
while (strncmp(limeReaderType(LimeR), "ildg-binary-data",16));
n_uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
ILDGtype ILDGt(true, LimeR);
// this is special for double prec data, just for the moment
uint32_t csum = BinaryIO::readObjectParallel< itype, sobjd >(
Umu, filename, ILDGMunger<sobjd, sobj>(), 0, format, ILDGt);
// Check configuration
// todo
return csum;
}
template <class vsimd>
uint32_t writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, std::string format) {
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef iLorentzColourMatrix<vsimd> vobj;
typedef typename vobj::scalar_object sobj;
typedef LorentzColourMatrixD fobj;
ILDGField header;
// fill the header
header.floating_point = format;
ILDGUnmunger<fobj, sobj> munge;
unsigned int offset = writeHeader(header);
BinaryIO::Uint32Checksum<vobj, fobj>(Umu, munge, header.checksum);
// Write data record header
n_uint64_t PayloadSize = sizeof(fobj) * Umu._grid->_gsites;
createHeader("ildg-binary-data", 0, 1, PayloadSize, LimeW);
ILDGtype ILDGt(true, LimeW);
uint32_t csum = BinaryIO::writeObjectParallel<vobj, fobj>(
Umu, filename, munge, 0, header.floating_point, ILDGt);
limeWriterCloseRecord(LimeW);
// Last record
// the logical file name LNF
// look into documentation on how to generate this string
std::string LNF = "empty";
PayloadSize = sizeof(LNF);
createHeader("ildg-binary-lfn", 1 , 1, PayloadSize, LimeW);
limeWriteRecordData(const_cast<char*>(LNF.c_str()), &PayloadSize, LimeW);
limeWriterCloseRecord(LimeW);
return csum;
}
// format for RNG? Now just binary out
};
}
}
//HAVE_LIME
#endif
#endif

View File

@ -0,0 +1,80 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/parallelIO/IldgIO.h
Copyright (C) 2015
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_ILDGTYPES_IO_H
#define GRID_ILDGTYPES_IO_H
#ifdef HAVE_LIME
extern "C" { // for linkage
#include "lime.h"
}
namespace Grid {
struct ILDGtype {
bool is_ILDG;
LimeWriter* LW;
LimeReader* LR;
ILDGtype(bool is, LimeWriter* L) : is_ILDG(is), LW(L), LR(NULL) {}
ILDGtype(bool is, LimeReader* L) : is_ILDG(is), LW(NULL), LR(L) {}
ILDGtype() : is_ILDG(false), LW(NULL), LR(NULL) {}
};
class ILDGField {
public:
// header strings (not in order)
std::vector<int> dimension;
std::vector<std::string> boundary;
int data_start;
std::string hdr_version;
std::string storage_format;
// Checks on data
double link_trace;
double plaquette;
uint32_t checksum;
unsigned int sequence_number;
std::string data_type;
std::string ensemble_id;
std::string ensemble_label;
std::string creator;
std::string creator_hardware;
std::string creation_date;
std::string archive_date;
std::string floating_point;
};
}
#else
namespace Grid {
struct ILDGtype {
bool is_ILDG;
ILDGtype() : is_ILDG(false) {}
};
}
#endif
#endif

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -6,9 +6,9 @@
Copyright (C) 2015
Author: Matt Spraggs <matthew.spraggs@gmail.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Matt Spraggs <matthew.spraggs@gmail.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -25,8 +25,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_NERSC_IO_H
#define GRID_NERSC_IO_H
@ -41,92 +41,92 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <pwd.h>
namespace Grid {
namespace QCD {
namespace QCD {
using namespace Grid;
using namespace Grid;
////////////////////////////////////////////////////////////////////////////////
// Some data types for intermediate storage
////////////////////////////////////////////////////////////////////////////////
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, 4 >;
////////////////////////////////////////////////////////////////////////////////
// Some data types for intermediate storage
////////////////////////////////////////////////////////////////////////////////
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, 4 >;
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
////////////////////////////////////////////////////////////////////////////////
// header specification/interpretation
////////////////////////////////////////////////////////////////////////////////
class NerscField {
public:
// header strings (not in order)
int dimension[4];
std::string boundary[4];
int data_start;
std::string hdr_version;
std::string storage_format;
// Checks on data
double link_trace;
double plaquette;
uint32_t checksum;
unsigned int sequence_number;
std::string data_type;
std::string ensemble_id ;
std::string ensemble_label ;
std::string creator ;
std::string creator_hardware ;
std::string creation_date ;
std::string archive_date ;
std::string floating_point;
};
////////////////////////////////////////////////////////////////////////////////
// header specification/interpretation
////////////////////////////////////////////////////////////////////////////////
class NerscField {
public:
// header strings (not in order)
int dimension[4];
std::string boundary[4];
int data_start;
std::string hdr_version;
std::string storage_format;
// Checks on data
double link_trace;
double plaquette;
uint32_t checksum;
unsigned int sequence_number;
std::string data_type;
std::string ensemble_id ;
std::string ensemble_label ;
std::string creator ;
std::string creator_hardware ;
std::string creation_date ;
std::string archive_date ;
std::string floating_point;
};
//////////////////////////////////////////////////////////////////////
// Bit and Physical Checksumming and QA of data
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
// Bit and Physical Checksumming and QA of data
//////////////////////////////////////////////////////////////////////
inline void NerscGrid(GridBase *grid,NerscField &header)
{
assert(grid->_ndimension==4);
for(int d=0;d<4;d++) {
header.dimension[d] = grid->_fdimensions[d];
}
for(int d=0;d<4;d++) {
header.boundary[d] = std::string("PERIODIC");
}
}
template<class GaugeField>
inline void NerscStatistics(GaugeField & data,NerscField &header)
{
// How to convert data precision etc...
header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplR>::linkTrace(data);
header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplR>::avgPlaquette(data);
}
inline void NerscGrid(GridBase *grid,NerscField &header)
{
assert(grid->_ndimension==4);
for(int d=0;d<4;d++) {
header.dimension[d] = grid->_fdimensions[d];
}
for(int d=0;d<4;d++) {
header.boundary[d] = std::string("PERIODIC");
}
}
template<class GaugeField>
inline void NerscStatistics(GaugeField & data,NerscField &header)
{
// How to convert data precision etc...
header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplR>::linkTrace(data);
header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplR>::avgPlaquette(data);
}
inline void NerscMachineCharacteristics(NerscField &header)
{
// Who
struct passwd *pw = getpwuid (getuid());
if (pw) header.creator = std::string(pw->pw_name);
inline void NerscMachineCharacteristics(NerscField &header)
{
// Who
struct passwd *pw = getpwuid (getuid());
if (pw) header.creator = std::string(pw->pw_name);
// When
std::time_t t = std::time(nullptr);
std::tm tm = *std::localtime(&t);
std::ostringstream oss;
// oss << std::put_time(&tm, "%c %Z");
header.creation_date = oss.str();
header.archive_date = header.creation_date;
// When
std::time_t t = std::time(nullptr);
std::tm tm = *std::localtime(&t);
std::ostringstream oss;
// oss << std::put_time(&tm, "%c %Z");
header.creation_date = oss.str();
header.archive_date = header.creation_date;
// What
struct utsname name; uname(&name);
header.creator_hardware = std::string(name.nodename)+"-";
header.creator_hardware+= std::string(name.machine)+"-";
header.creator_hardware+= std::string(name.sysname)+"-";
header.creator_hardware+= std::string(name.release);
// What
struct utsname name; uname(&name);
header.creator_hardware = std::string(name.nodename)+"-";
header.creator_hardware+= std::string(name.machine)+"-";
header.creator_hardware+= std::string(name.sysname)+"-";
header.creator_hardware+= std::string(name.release);
}
//////////////////////////////////////////////////////////////////////
// Utilities ; these are QCD aware
//////////////////////////////////////////////////////////////////////
}
//////////////////////////////////////////////////////////////////////
// Utilities ; these are QCD aware
//////////////////////////////////////////////////////////////////////
inline void NerscChecksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum)
{
BinaryIO::Uint32Checksum(buf,buf_size_bytes,csum);
@ -145,30 +145,32 @@ inline void NerscMachineCharacteristics(NerscField &header)
template<class fobj,class sobj>
struct NerscSimpleMunger{
void operator() (fobj &in,sobj &out,uint32_t &csum){
for(int mu=0;mu<4;mu++){
for(int i=0;i<3;i++){
for(int j=0;j<3;j++){
out(mu)()(i,j) = in(mu)()(i,j);
}}}
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
void operator()(fobj &in, sobj &out, uint32_t &csum) {
for (int mu = 0; mu < Nd; mu++) {
for (int i = 0; i < Nc; i++) {
for (int j = 0; j < Nc; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}
}
}
NerscChecksum((uint32_t *)&in, sizeof(in), csum);
};
};
template<class fobj,class sobj>
struct NerscSimpleUnmunger{
void operator() (sobj &in,fobj &out,uint32_t &csum){
for(int mu=0;mu<Nd;mu++){
for(int i=0;i<Nc;i++){
for(int j=0;j<Nc;j++){
out(mu)()(i,j) = in(mu)()(i,j);
}}}
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
template <class fobj, class sobj>
struct NerscSimpleUnmunger {
void operator()(sobj &in, fobj &out, uint32_t &csum) {
for (int mu = 0; mu < Nd; mu++) {
for (int i = 0; i < Nc; i++) {
for (int j = 0; j < Nc; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}
}
}
NerscChecksum((uint32_t *)&out, sizeof(out), csum);
};
};
template<class fobj,class sobj>
struct Nersc3x2munger{
void operator() (fobj &in,sobj &out,uint32_t &csum){
@ -204,74 +206,74 @@ inline void NerscMachineCharacteristics(NerscField &header)
};
////////////////////////////////////////////////////////////////////////////////
// Write and read from fstream; comput header offset for payload
////////////////////////////////////////////////////////////////////////////////
class NerscIO : public BinaryIO {
public:
////////////////////////////////////////////////////////////////////////////////
// Write and read from fstream; comput header offset for payload
////////////////////////////////////////////////////////////////////////////////
class NerscIO : public BinaryIO {
public:
static inline void truncate(std::string file){
std::ofstream fout(file,std::ios::out);
}
static inline void truncate(std::string file){
std::ofstream fout(file,std::ios::out);
}
#define dump_nersc_header(field, s)\
s << "BEGIN_HEADER" << std::endl;\
s << "HDR_VERSION = " << field.hdr_version << std::endl;\
s << "DATATYPE = " << field.data_type << std::endl;\
s << "STORAGE_FORMAT = " << field.storage_format << std::endl;\
for(int i=0;i<4;i++){\
s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;\
}\
s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;\
s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl;\
for(int i=0;i<4;i++){\
s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;\
}\
\
s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl;\
s << "ENSEMBLE_ID = " << field.ensemble_id << std::endl;\
s << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl;\
s << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl;\
s << "CREATOR = " << field.creator << std::endl;\
s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;\
s << "CREATION_DATE = " << field.creation_date << std::endl;\
s << "ARCHIVE_DATE = " << field.archive_date << std::endl;\
s << "FLOATING_POINT = " << field.floating_point << std::endl;\
s << "END_HEADER" << std::endl;
#define dump_nersc_header(field, s) \
s << "BEGIN_HEADER" << std::endl; \
s << "HDR_VERSION = " << field.hdr_version << std::endl; \
s << "DATATYPE = " << field.data_type << std::endl; \
s << "STORAGE_FORMAT = " << field.storage_format << std::endl; \
for(int i=0;i<4;i++){ \
s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \
} \
s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \
s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl; \
for(int i=0;i<4;i++){ \
s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl; \
} \
\
s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl; \
s << "ENSEMBLE_ID = " << field.ensemble_id << std::endl; \
s << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl; \
s << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl; \
s << "CREATOR = " << field.creator << std::endl; \
s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl; \
s << "CREATION_DATE = " << field.creation_date << std::endl; \
s << "ARCHIVE_DATE = " << field.archive_date << std::endl; \
s << "FLOATING_POINT = " << field.floating_point << std::endl; \
s << "END_HEADER" << std::endl;
static inline unsigned int writeHeader(NerscField &field,std::string file)
{
std::ofstream fout(file,std::ios::out|std::ios::in);
fout.seekp(0,std::ios::beg);
dump_nersc_header(field, fout);
field.data_start = fout.tellp();
return field.data_start;
}
static inline unsigned int writeHeader(NerscField &field,std::string file)
{
std::ofstream fout(file,std::ios::out|std::ios::in);
fout.seekp(0,std::ios::beg);
dump_nersc_header(field, fout);
field.data_start = fout.tellp();
return field.data_start;
}
// for the header-reader
static inline int readHeader(std::string file,GridBase *grid, NerscField &field)
{
int offset=0;
std::map<std::string,std::string> header;
std::string line;
// for the header-reader
static inline int readHeader(std::string file,GridBase *grid, NerscField &field)
{
int offset=0;
std::map<std::string,std::string> header;
std::string line;
//////////////////////////////////////////////////
// read the header
//////////////////////////////////////////////////
std::ifstream fin(file);
//////////////////////////////////////////////////
// read the header
//////////////////////////////////////////////////
std::ifstream fin(file);
getline(fin,line); // read one line and insist is
getline(fin,line); // read one line and insist is
removeWhitespace(line);
std::cout << GridLogMessage << "* " << line << std::endl;
removeWhitespace(line);
std::cout << GridLogMessage << "* " << line << std::endl;
assert(line==std::string("BEGIN_HEADER"));
assert(line==std::string("BEGIN_HEADER"));
do {
getline(fin,line); // read one line
std::cout << GridLogMessage << "* "<<line<< std::endl;
int eq = line.find("=");
if(eq >0) {
do {
getline(fin,line); // read one line
std::cout << GridLogMessage << "* "<<line<< std::endl;
int eq = line.find("=");
if(eq >0) {
std::string key=line.substr(0,eq);
std::string val=line.substr(eq+1);
removeWhitespace(key);
@ -279,275 +281,272 @@ static inline int readHeader(std::string file,GridBase *grid, NerscField &field
header[key] = val;
}
} while( line.find("END_HEADER") == std::string::npos );
} while( line.find("END_HEADER") == std::string::npos );
field.data_start = fin.tellg();
field.data_start = fin.tellg();
//////////////////////////////////////////////////
// chomp the values
//////////////////////////////////////////////////
field.hdr_version = header["HDR_VERSION"];
field.data_type = header["DATATYPE"];
field.storage_format = header["STORAGE_FORMAT"];
//////////////////////////////////////////////////
// chomp the values
//////////////////////////////////////////////////
field.hdr_version = header["HDR_VERSION"];
field.data_type = header["DATATYPE"];
field.storage_format = header["STORAGE_FORMAT"];
field.dimension[0] = std::stol(header["DIMENSION_1"]);
field.dimension[1] = std::stol(header["DIMENSION_2"]);
field.dimension[2] = std::stol(header["DIMENSION_3"]);
field.dimension[3] = std::stol(header["DIMENSION_4"]);
field.dimension[0] = std::stol(header["DIMENSION_1"]);
field.dimension[1] = std::stol(header["DIMENSION_2"]);
field.dimension[2] = std::stol(header["DIMENSION_3"]);
field.dimension[3] = std::stol(header["DIMENSION_4"]);
assert(grid->_ndimension == 4);
for(int d=0;d<4;d++){
assert(grid->_fdimensions[d]==field.dimension[d]);
}
assert(grid->_ndimension == 4);
for(int d=0;d<4;d++){
assert(grid->_fdimensions[d]==field.dimension[d]);
}
field.link_trace = std::stod(header["LINK_TRACE"]);
field.plaquette = std::stod(header["PLAQUETTE"]);
field.link_trace = std::stod(header["LINK_TRACE"]);
field.plaquette = std::stod(header["PLAQUETTE"]);
field.boundary[0] = header["BOUNDARY_1"];
field.boundary[1] = header["BOUNDARY_2"];
field.boundary[2] = header["BOUNDARY_3"];
field.boundary[3] = header["BOUNDARY_4"];
field.boundary[0] = header["BOUNDARY_1"];
field.boundary[1] = header["BOUNDARY_2"];
field.boundary[2] = header["BOUNDARY_3"];
field.boundary[3] = header["BOUNDARY_4"];
field.checksum = std::stoul(header["CHECKSUM"],0,16);
field.ensemble_id = header["ENSEMBLE_ID"];
field.ensemble_label = header["ENSEMBLE_LABEL"];
field.sequence_number = std::stol(header["SEQUENCE_NUMBER"]);
field.creator = header["CREATOR"];
field.creator_hardware = header["CREATOR_HARDWARE"];
field.creation_date = header["CREATION_DATE"];
field.archive_date = header["ARCHIVE_DATE"];
field.floating_point = header["FLOATING_POINT"];
field.checksum = std::stoul(header["CHECKSUM"],0,16);
field.ensemble_id = header["ENSEMBLE_ID"];
field.ensemble_label = header["ENSEMBLE_LABEL"];
field.sequence_number = std::stol(header["SEQUENCE_NUMBER"]);
field.creator = header["CREATOR"];
field.creator_hardware = header["CREATOR_HARDWARE"];
field.creation_date = header["CREATION_DATE"];
field.archive_date = header["ARCHIVE_DATE"];
field.floating_point = header["FLOATING_POINT"];
return field.data_start;
}
return field.data_start;
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Now the meat: the object readers
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Now the meat: the object readers
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define PARALLEL_READ
#define PARALLEL_WRITE
template<class vsimd>
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file)
{
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
template<class vsimd>
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file)
{
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
GridBase *grid = Umu._grid;
int offset = readHeader(file,Umu._grid,header);
GridBase *grid = Umu._grid;
int offset = readHeader(file,Umu._grid,header);
NerscField clone(header);
NerscField clone(header);
std::string format(header.floating_point);
std::string format(header.floating_point);
int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64"));
int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64"));
uint32_t csum;
// depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type>
if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
if ( ieee32 || ieee32big ) {
uint32_t csum;
// depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type>
if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
if ( ieee32 || ieee32big ) {
#ifdef PARALLEL_READ
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format);
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format);
#else
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format);
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format);
#endif
}
if ( ieee64 || ieee64big ) {
}
if ( ieee64 || ieee64big ) {
#ifdef PARALLEL_READ
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format);
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format);
#else
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format);
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format);
#endif
}
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) {
if ( ieee32 || ieee32big ) {
}
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) {
if ( ieee32 || ieee32big ) {
#ifdef PARALLEL_READ
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
#else
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
#endif
}
if ( ieee64 || ieee64big ) {
}
if ( ieee64 || ieee64big ) {
#ifdef PARALLEL_READ
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format);
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format);
#else
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format);
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format);
#endif
}
} else {
assert(0);
}
}
} else {
assert(0);
}
NerscStatistics<GaugeField>(Umu,clone);
NerscStatistics<GaugeField>(Umu,clone);
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<< csum<< std::dec
<<" header "<<std::hex<<header.checksum<<std::dec <<std::endl;
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette
<<" header "<<header.plaquette<<std::endl;
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace
<<" header "<<header.link_trace<<std::endl;
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
assert(csum == header.checksum );
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<< csum<< std::dec
<<" header "<<std::hex<<header.checksum<<std::dec <<std::endl;
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette
<<" header "<<header.plaquette<<std::endl;
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace
<<" header "<<header.link_trace<<std::endl;
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
assert(csum == header.checksum );
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
}
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
}
template<class vsimd>
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,std::string file, int two_row,int bits32)
{
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
template<class vsimd>
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,std::string file, int two_row,int bits32)
{
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef iLorentzColourMatrix<vsimd> vobj;
typedef typename vobj::scalar_object sobj;
typedef iLorentzColourMatrix<vsimd> vobj;
typedef typename vobj::scalar_object sobj;
// Following should become arguments
NerscField header;
header.sequence_number = 1;
header.ensemble_id = "UKQCD";
header.ensemble_label = "DWF";
// Following should become arguments
NerscField header;
header.sequence_number = 1;
header.ensemble_id = "UKQCD";
header.ensemble_label = "DWF";
typedef LorentzColourMatrixD fobj3D;
typedef LorentzColour2x3D fobj2D;
typedef LorentzColourMatrixF fobj3f;
typedef LorentzColour2x3F fobj2f;
GridBase *grid = Umu._grid;
NerscGrid(grid,header);
NerscStatistics<GaugeField>(Umu,header);
NerscMachineCharacteristics(header);
uint32_t csum;
int offset;
typedef LorentzColourMatrixD fobj3D;
typedef LorentzColour2x3D fobj2D;
truncate(file);
GridBase *grid = Umu._grid;
if ( two_row ) {
NerscGrid(grid,header);
NerscStatistics<GaugeField>(Umu,header);
NerscMachineCharacteristics(header);
header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE");
Nersc3x2unmunger<fobj2D,sobj> munge;
BinaryIO::Uint32Checksum<vobj,fobj2D>(Umu, munge,header.checksum);
offset = writeHeader(header,file);
uint32_t csum;
int offset;
truncate(file);
if ( two_row ) {
header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE");
Nersc3x2unmunger<fobj2D,sobj> munge;
BinaryIO::Uint32Checksum<vobj,fobj2D>(Umu, munge,header.checksum);
offset = writeHeader(header,file);
#ifdef PARALLEL_WRITE
csum=BinaryIO::writeObjectParallel<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point);
csum=BinaryIO::writeObjectParallel<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point);
#else
csum=BinaryIO::writeObjectSerial<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point);
csum=BinaryIO::writeObjectSerial<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point);
#endif
} else {
header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE_3x3");
NerscSimpleUnmunger<fobj3D,sobj> munge;
BinaryIO::Uint32Checksum<vobj,fobj3D>(Umu, munge,header.checksum);
offset = writeHeader(header,file);
} else {
header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE_3x3");
NerscSimpleUnmunger<fobj3D,sobj> munge;
BinaryIO::Uint32Checksum<vobj,fobj3D>(Umu, munge,header.checksum);
offset = writeHeader(header,file);
#ifdef PARALLEL_WRITE
csum=BinaryIO::writeObjectParallel<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
csum=BinaryIO::writeObjectParallel<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
#else
csum=BinaryIO::writeObjectSerial<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
csum=BinaryIO::writeObjectSerial<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
#endif
}
}
std::cout<<GridLogMessage <<"Written NERSC Configuration "<<file<< " checksum "<<std::hex<<csum<< std::dec<<" plaq "<< header.plaquette <<std::endl;
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "<<std::hex<<csum<< std::dec<<" plaq "<< header.plaquette <<std::endl;
}
}
///////////////////////////////
// RNG state
///////////////////////////////
static inline void writeRNGState(GridSerialRNG &serial,GridParallelRNG &parallel,std::string file)
{
typedef typename GridParallelRNG::RngStateType RngStateType;
///////////////////////////////
// RNG state
///////////////////////////////
static inline void writeRNGState(GridSerialRNG &serial,GridParallelRNG &parallel,std::string file)
{
typedef typename GridParallelRNG::RngStateType RngStateType;
// Following should become arguments
NerscField header;
header.sequence_number = 1;
header.ensemble_id = "UKQCD";
header.ensemble_label = "DWF";
// Following should become arguments
NerscField header;
header.sequence_number = 1;
header.ensemble_id = "UKQCD";
header.ensemble_label = "DWF";
GridBase *grid = parallel._grid;
GridBase *grid = parallel._grid;
NerscGrid(grid,header);
header.link_trace=0.0;
header.plaquette=0.0;
NerscMachineCharacteristics(header);
NerscGrid(grid,header);
header.link_trace=0.0;
header.plaquette=0.0;
NerscMachineCharacteristics(header);
uint32_t csum;
int offset;
uint32_t csum;
int offset;
#ifdef RNG_RANLUX
header.floating_point = std::string("UINT64");
header.data_type = std::string("RANLUX48");
header.floating_point = std::string("UINT64");
header.data_type = std::string("RANLUX48");
#endif
#ifdef RNG_MT19937
header.floating_point = std::string("UINT32");
header.data_type = std::string("MT19937");
header.floating_point = std::string("UINT32");
header.data_type = std::string("MT19937");
#endif
#ifdef RNG_SITMO
header.floating_point = std::string("UINT64");
header.data_type = std::string("SITMO");
header.floating_point = std::string("UINT64");
header.data_type = std::string("SITMO");
#endif
truncate(file);
offset = writeHeader(header,file);
csum=BinaryIO::writeRNGSerial(serial,parallel,file,offset);
header.checksum = csum;
offset = writeHeader(header,file);
truncate(file);
offset = writeHeader(header,file);
csum=BinaryIO::writeRNGSerial(serial,parallel,file,offset);
header.checksum = csum;
offset = writeHeader(header,file);
std::cout<<GridLogMessage <<"Written NERSC RNG STATE "<<file<< " checksum "<<std::hex<<csum<<std::dec<<std::endl;
std::cout<<GridLogMessage <<"Written NERSC RNG STATE "<<file<< " checksum "<<std::hex<<csum<<std::dec<<std::endl;
}
}
static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,NerscField& header,std::string file)
{
typedef typename GridParallelRNG::RngStateType RngStateType;
static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,NerscField& header,std::string file)
{
typedef typename GridParallelRNG::RngStateType RngStateType;
GridBase *grid = parallel._grid;
GridBase *grid = parallel._grid;
int offset = readHeader(file,grid,header);
int offset = readHeader(file,grid,header);
NerscField clone(header);
NerscField clone(header);
std::string format(header.floating_point);
std::string data_type(header.data_type);
std::string format(header.floating_point);
std::string data_type(header.data_type);
#ifdef RNG_RANLUX
assert(format == std::string("UINT64"));
assert(data_type == std::string("RANLUX48"));
assert(format == std::string("UINT64"));
assert(data_type == std::string("RANLUX48"));
#endif
#ifdef RNG_MT19937
assert(format == std::string("UINT32"));
assert(data_type == std::string("MT19937"));
assert(format == std::string("UINT32"));
assert(data_type == std::string("MT19937"));
#endif
#ifdef RNG_SITMO
assert(format == std::string("UINT64"));
assert(data_type == std::string("SITMO"));
assert(format == std::string("UINT64"));
assert(data_type == std::string("SITMO"));
#endif
// depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type>
uint32_t csum=BinaryIO::readRNGSerial(serial,parallel,file,offset);
// depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type>
uint32_t csum=BinaryIO::readRNGSerial(serial,parallel,file,offset);
assert(csum == header.checksum );
assert(csum == header.checksum );
std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl;
}
std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl;
}
};
};
}}
}}
#endif

View File

@ -205,13 +205,14 @@ public:
void Stop(void) {
count=0;
cycles=0;
size_t ign;
#ifdef __linux__
ssize_t ign;
if ( fd!= -1) {
::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0);
ign=::read(fd, &count, sizeof(long long));
ign=::read(cyclefd, &cycles, sizeof(long long));
ign+=::read(cyclefd, &cycles, sizeof(long long));
assert(ign=2*sizeof(long long));
}
elapsed = cyclecount() - begin;
#else

124
lib/qcd/LatticeTheories.h Normal file
View File

@ -0,0 +1,124 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/QCD.h
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_LT_H
#define GRID_LT_H
namespace Grid{
// First steps in the complete generalization of the Physics part
// Design not final
namespace LatticeTheories {
template <int Dimensions>
struct LatticeTheory {
static const int Nd = Dimensions;
static const int Nds = Dimensions * 2; // double stored field
template <typename vtype>
using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
};
template <int Dimensions, int Colours>
struct LatticeGaugeTheory : public LatticeTheory<Dimensions> {
static const int Nds = Dimensions * 2;
static const int Nd = Dimensions;
static const int Nc = Colours;
template <typename vtype>
using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > >;
template <typename vtype>
using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd>;
template <typename vtype>
using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds>;
template <typename vtype>
using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
};
template <int Dimensions, int Colours, int Spin>
struct FermionicLatticeGaugeTheory
: public LatticeGaugeTheory<Dimensions, Colours> {
static const int Nd = Dimensions;
static const int Nds = Dimensions * 2;
static const int Nc = Colours;
static const int Ns = Spin;
template <typename vtype>
using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
template <typename vtype>
using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template <typename vtype>
using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template <typename vtype>
using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
// These 2 only if Spin is a multiple of 2
static const int Nhs = Spin / 2;
template <typename vtype>
using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
template <typename vtype>
using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
//tests
typedef iColourMatrix<Complex> ColourMatrix;
typedef iColourMatrix<ComplexF> ColourMatrixF;
typedef iColourMatrix<ComplexD> ColourMatrixD;
};
// Examples, not complete now.
struct QCD : public FermionicLatticeGaugeTheory<4, 3, 4> {
static const int Xp = 0;
static const int Yp = 1;
static const int Zp = 2;
static const int Tp = 3;
static const int Xm = 4;
static const int Ym = 5;
static const int Zm = 6;
static const int Tm = 7;
typedef FermionicLatticeGaugeTheory FLGT;
typedef FLGT::iSpinMatrix<Complex > SpinMatrix;
typedef FLGT::iSpinMatrix<ComplexF > SpinMatrixF;
typedef FLGT::iSpinMatrix<ComplexD > SpinMatrixD;
};
struct QED : public FermionicLatticeGaugeTheory<4, 1, 4> {//fill
};
template <int Dimensions>
struct Scalar : public LatticeTheory<Dimensions> {};
}; // LatticeTheories
} // Grid
#endif

View File

@ -32,9 +32,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_QCD_BASE_H
#define GRID_QCD_BASE_H
namespace Grid{
namespace QCD {
static const int Xdir = 0;
static const int Ydir = 1;
static const int Zdir = 2;
static const int Tdir = 3;
static const int Xp = 0;
static const int Yp = 1;
@ -354,36 +357,36 @@ namespace QCD {
//////////////////////////////////////////////
template<class vobj>
void pokeColour(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0))> & rhs,
int i)
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0))> & rhs,
int i)
{
PokeIndex<ColourIndex>(lhs,rhs,i);
}
template<class vobj>
void pokeColour(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0,0))> & rhs,
int i,int j)
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0,0))> & rhs,
int i,int j)
{
PokeIndex<ColourIndex>(lhs,rhs,i,j);
}
template<class vobj>
void pokeSpin(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0))> & rhs,
int i)
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0))> & rhs,
int i)
{
PokeIndex<SpinIndex>(lhs,rhs,i);
}
template<class vobj>
void pokeSpin(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0,0))> & rhs,
int i,int j)
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0,0))> & rhs,
int i,int j)
{
PokeIndex<SpinIndex>(lhs,rhs,i,j);
}
template<class vobj>
void pokeLorentz(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<LorentzIndex>(lhs._odata[0],0))> & rhs,
int i)
const Lattice<decltype(peekIndex<LorentzIndex>(lhs._odata[0],0))> & rhs,
int i)
{
PokeIndex<LorentzIndex>(lhs,rhs,i);
}
@ -492,6 +495,38 @@ namespace QCD {
} //namespace QCD
} // Grid
/*
<<<<<<< HEAD
#include <Grid/qcd/utils/SpaceTimeGrid.h>
#include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/spin/TwoSpinor.h>
#include <Grid/qcd/utils/LinalgUtils.h>
#include <Grid/qcd/utils/CovariantCshift.h>
// Include representations
#include <Grid/qcd/utils/SUn.h>
#include <Grid/qcd/utils/SUnAdjoint.h>
#include <Grid/qcd/utils/SUnTwoIndex.h>
#include <Grid/qcd/representations/hmc_types.h>
// Scalar field
#include <Grid/qcd/utils/ScalarObjs.h>
#include <Grid/qcd/action/Actions.h>
#include <Grid/qcd/smearing/Smearing.h>
#include <Grid/qcd/hmc/integrators/Integrator.h>
#include <Grid/qcd/hmc/integrators/Integrator_algorithm.h>
#include <Grid/qcd/observables/hmc_observable.h>
#include <Grid/qcd/hmc/HMC.h>
//#include <Grid/qcd/modules/mods.h>
=======
>>>>>>> develop
*/
#endif

View File

@ -4,10 +4,11 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/ActionBase.h
Copyright (C) 2015
Copyright (C) 2015-2016
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -27,128 +28,29 @@ See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef QCD_ACTION_BASE
#define QCD_ACTION_BASE
#ifndef ACTION_BASE_H
#define ACTION_BASE_H
namespace Grid {
namespace QCD {
template <class GaugeField>
class Action {
template <class GaugeField >
class Action
{
public:
bool is_smeared = false;
// Boundary conditions? // Heatbath?
virtual void refresh(const GaugeField& U,
GridParallelRNG& pRNG) = 0; // refresh pseudofermions
virtual RealD S(const GaugeField& U) = 0; // evaluate the action
virtual void deriv(const GaugeField& U,
GaugeField& dSdU) = 0; // evaluate the action derivative
virtual ~Action(){};
// Heatbath?
virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
virtual RealD S(const GaugeField& U) = 0; // evaluate the action
virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0; // evaluate the action derivative
virtual std::string action_name() = 0; // return the action name
virtual std::string LogParameters() = 0; // prints action parameters
virtual ~Action(){}
};
// Indexing of tuple types
template <class T, class Tuple>
struct Index;
template <class T, class... Types>
struct Index<T, std::tuple<T, Types...>> {
static const std::size_t value = 0;
};
template <class T, class U, class... Types>
struct Index<T, std::tuple<U, Types...>> {
static const std::size_t value = 1 + Index<T, std::tuple<Types...>>::value;
};
/*
template <class GaugeField>
struct ActionLevel {
public:
typedef Action<GaugeField>*
ActPtr; // now force the same colours as the rest of the code
//Add supported representations here
unsigned int multiplier;
std::vector<ActPtr> actions;
ActionLevel(unsigned int mul = 1) : actions(0), multiplier(mul) {
assert(mul >= 1);
};
void push_back(ActPtr ptr) { actions.push_back(ptr); }
};
*/
template <class GaugeField, class Repr = NoHirep >
struct ActionLevel {
public:
unsigned int multiplier;
// Fundamental repr actions separated because of the smearing
typedef Action<GaugeField>* ActPtr;
// construct a tuple of vectors of the actions for the corresponding higher
// representation fields
typedef typename AccessTypes<Action, Repr>::VectorCollection action_collection;
action_collection actions_hirep;
typedef typename AccessTypes<Action, Repr>::FieldTypeCollection action_hirep_types;
std::vector<ActPtr>& actions;
// Temporary conversion between ActionLevel and ActionLevelHirep
//ActionLevelHirep(ActionLevel<GaugeField>& AL ):actions(AL.actions), multiplier(AL.multiplier){}
ActionLevel(unsigned int mul = 1) : actions(std::get<0>(actions_hirep)), multiplier(mul) {
// initialize the hirep vectors to zero.
//apply(this->resize, actions_hirep, 0); //need a working resize
assert(mul >= 1);
};
//void push_back(ActPtr ptr) { actions.push_back(ptr); }
template < class Field >
void push_back(Action<Field>* ptr) {
// insert only in the correct vector
std::get< Index < Field, action_hirep_types>::value >(actions_hirep).push_back(ptr);
};
template < class ActPtr>
static void resize(ActPtr ap, unsigned int n){
ap->resize(n);
}
//template <std::size_t I>
//auto getRepresentation(Repr& R)->decltype(std::get<I>(R).U) {return std::get<I>(R).U;}
// Loop on tuple for a callable function
template <std::size_t I = 1, typename Callable, typename ...Args>
inline typename std::enable_if<I == std::tuple_size<action_collection>::value, void>::type apply(
Callable, Repr& R,Args&...) const {}
template <std::size_t I = 1, typename Callable, typename ...Args>
inline typename std::enable_if<I < std::tuple_size<action_collection>::value, void>::type apply(
Callable fn, Repr& R, Args&... arguments) const {
fn(std::get<I>(actions_hirep), std::get<I>(R.rep), arguments...);
apply<I + 1>(fn, R, arguments...);
}
};
//template <class GaugeField>
//using ActionSet = std::vector<ActionLevel<GaugeField> >;
template <class GaugeField, class R>
using ActionSet = std::vector<ActionLevel<GaugeField, R> >;
}
}
#endif
#endif // ACTION_BASE_H

View File

@ -31,15 +31,31 @@ directory
#define QCD_ACTION_CORE
#include <Grid/qcd/action/ActionBase.h>
#include <Grid/qcd/action/ActionSet.h>
#include <Grid/qcd/action/ActionParams.h>
////////////////////////////////////////////
// Gauge Actions
////////////////////////////////////////////
#include <Grid/qcd/action/gauge/Gauge.h>
////////////////////////////////////////////
// Fermion prereqs
////////////////////////////////////////////
#include <Grid/qcd/action/fermion/FermionCore.h>
////////////////////////////////////////////
// Scalar Actions
////////////////////////////////////////////
#include <Grid/qcd/action/scalar/Scalar.h>
////////////////////////////////////////////
// Utility functions
////////////////////////////////////////////
#include <Grid/qcd/utils/Metric.h>
#include <Grid/qcd/utils/CovariantLaplacian.h>
#endif

View File

@ -1,67 +1,92 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/ActionParams.h
Source file: ./lib/qcd/action/ActionParams.h
Copyright (C) 2015
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_ACTION_PARAMS_H
#define GRID_QCD_ACTION_PARAMS_H
namespace Grid {
namespace QCD {
// These can move into a params header and be given MacroMagic serialisation
struct GparityWilsonImplParams {
bool overlapCommsCompute;
std::vector<int> twists;
GparityWilsonImplParams () : twists(Nd,0), overlapCommsCompute(false) {};
// These can move into a params header and be given MacroMagic serialisation
struct GparityWilsonImplParams {
bool overlapCommsCompute;
std::vector<int> twists;
GparityWilsonImplParams() : twists(Nd, 0), overlapCommsCompute(false){};
};
struct WilsonImplParams {
bool overlapCommsCompute;
std::vector<Complex> boundary_phases;
WilsonImplParams() : overlapCommsCompute(false) {
boundary_phases.resize(Nd, 1.0);
};
WilsonImplParams(const std::vector<Complex> phi)
: boundary_phases(phi), overlapCommsCompute(false) {}
};
struct WilsonImplParams {
bool overlapCommsCompute;
WilsonImplParams() : overlapCommsCompute(false) {};
};
struct StaggeredImplParams {
StaggeredImplParams() {};
};
struct OneFlavourRationalParams : Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(OneFlavourRationalParams,
RealD, lo,
RealD, hi,
int, MaxIter,
RealD, tolerance,
int, degree,
int, precision);
// MaxIter and tolerance, vectors??
// constructor
OneFlavourRationalParams( RealD _lo = 0.0,
RealD _hi = 1.0,
int _maxit = 1000,
RealD tol = 1.0e-8,
int _degree = 10,
int _precision = 64)
: lo(_lo),
hi(_hi),
MaxIter(_maxit),
tolerance(tol),
degree(_degree),
precision(_precision){};
};
}
}
struct StaggeredImplParams {
StaggeredImplParams() {};
};
struct OneFlavourRationalParams {
RealD lo;
RealD hi;
int MaxIter; // Vector?
RealD tolerance; // Vector?
int degree=10;
int precision=64;
OneFlavourRationalParams (RealD _lo,RealD _hi,int _maxit,RealD tol=1.0e-8,int _degree = 10,int _precision=64) :
lo(_lo), hi(_hi), MaxIter(_maxit), tolerance(tol), degree(_degree), precision(_precision)
{};
};
}}
#endif

116
lib/qcd/action/ActionSet.h Normal file
View File

@ -0,0 +1,116 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/ActionSet.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef ACTION_SET_H
#define ACTION_SET_H
namespace Grid {
// Should drop this namespace here
namespace QCD {
//////////////////////////////////
// Indexing of tuple types
//////////////////////////////////
template <class T, class Tuple>
struct Index;
template <class T, class... Types>
struct Index<T, std::tuple<T, Types...>> {
static const std::size_t value = 0;
};
template <class T, class U, class... Types>
struct Index<T, std::tuple<U, Types...>> {
static const std::size_t value = 1 + Index<T, std::tuple<Types...>>::value;
};
////////////////////////////////////////////
// Action Level
// Action collection
// in a integration level
// (for multilevel integration schemes)
////////////////////////////////////////////
template <class Field, class Repr = NoHirep >
struct ActionLevel {
public:
unsigned int multiplier;
// Fundamental repr actions separated because of the smearing
typedef Action<Field>* ActPtr;
// construct a tuple of vectors of the actions for the corresponding higher
// representation fields
typedef typename AccessTypes<Action, Repr>::VectorCollection action_collection;
typedef typename AccessTypes<Action, Repr>::FieldTypeCollection action_hirep_types;
action_collection actions_hirep;
std::vector<ActPtr>& actions;
explicit ActionLevel(unsigned int mul = 1) :
actions(std::get<0>(actions_hirep)), multiplier(mul) {
// initialize the hirep vectors to zero.
// apply(this->resize, actions_hirep, 0); //need a working resize
assert(mul >= 1);
}
template < class GenField >
void push_back(Action<GenField>* ptr) {
// insert only in the correct vector
std::get< Index < GenField, action_hirep_types>::value >(actions_hirep).push_back(ptr);
};
template <class ActPtr>
static void resize(ActPtr ap, unsigned int n) {
ap->resize(n);
}
// Loop on tuple for a callable function
template <std::size_t I = 1, typename Callable, typename ...Args>
inline typename std::enable_if<I == std::tuple_size<action_collection>::value, void>::type apply(Callable, Repr& R,Args&...) const {}
template <std::size_t I = 1, typename Callable, typename ...Args>
inline typename std::enable_if<I < std::tuple_size<action_collection>::value, void>::type apply(Callable fn, Repr& R, Args&... arguments) const {
fn(std::get<I>(actions_hirep), std::get<I>(R.rep), arguments...);
apply<I + 1>(fn, R, arguments...);
}
};
// Define the ActionSet
template <class GaugeField, class R>
using ActionSet = std::vector<ActionLevel<GaugeField, R> >;
} // QCD
} // Grid
#endif // ACTION_SET_H

View File

@ -29,7 +29,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
*************************************************************************************/
/* END LEGAL */
#include <Grid/Eigen/Dense>
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
@ -320,7 +320,7 @@ void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const
this->DhopDeriv(mat,U,Din,dag);
} else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
MeooeDag5D(U,Din);
Meooe5D(U,Din);
this->DhopDeriv(mat,Din,V,dag);
}
};
@ -335,8 +335,8 @@ void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const
this->DhopDerivOE(mat,U,Din,dag);
} else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
MeooeDag5D(U,Din);
this->DhopDerivOE(mat,Din,V,dag);
Meooe5D(U,Din);
this->DhopDerivOE(mat,Din,V,dag);
}
};
template<class Impl>
@ -350,7 +350,7 @@ void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const
this->DhopDerivEO(mat,U,Din,dag);
} else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
MeooeDag5D(U,Din);
Meooe5D(U,Din);
this->DhopDerivEO(mat,Din,V,dag);
}
};
@ -380,6 +380,8 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
///////////////////////////////////////////////////////////
// The Cayley coeffs (unprec)
///////////////////////////////////////////////////////////
assert(gamma.size()==Ls);
omega.resize(Ls);
bs.resize(Ls);
cs.resize(Ls);
@ -412,10 +414,11 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
for(int i=0; i < Ls; i++){
as[i] = 1.0;
omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code
// assert(fabs(omega[i])>0.0);
bs[i] = 0.5*(bpc/omega[i] + bmc);
cs[i] = 0.5*(bpc/omega[i] - bmc);
}
////////////////////////////////////////////////////////
// Constants for the preconditioned matrix Cayley form
////////////////////////////////////////////////////////
@ -425,12 +428,12 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
ceo.resize(Ls);
for(int i=0;i<Ls;i++){
bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
// assert(fabs(bee[i])>0.0);
cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5));
beo[i]=as[i]*bs[i];
ceo[i]=-as[i]*cs[i];
}
aee.resize(Ls);
aeo.resize(Ls);
for(int i=0;i<Ls;i++){
@ -474,14 +477,16 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
{
Coeff_t delta_d=mass*cee[Ls-1];
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
for(int j=0;j<Ls-1;j++) {
// assert(fabs(bee[j])>0.0);
delta_d *= cee[j]/bee[j];
}
dee[Ls-1] += delta_d;
}
int inv=1;
this->MooeeInternalCompute(0,inv,MatpInv,MatmInv);
this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag);
}
@ -495,7 +500,9 @@ void CayleyFermion5D<Impl>::MooeeInternalCompute(int dag, int inv,
GridBase *grid = this->FermionRedBlackGrid();
int LLs = grid->_rdimensions[0];
if ( LLs == Ls ) return; // Not vectorised in 5th direction
if ( LLs == Ls ) {
return; // Not vectorised in 5th direction
}
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);

View File

@ -237,6 +237,13 @@ void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &
INSTANTIATE_DPERP(GparityWilsonImplD);
INSTANTIATE_DPERP(ZWilsonImplF);
INSTANTIATE_DPERP(ZWilsonImplD);
INSTANTIATE_DPERP(WilsonImplFH);
INSTANTIATE_DPERP(WilsonImplDF);
INSTANTIATE_DPERP(GparityWilsonImplFH);
INSTANTIATE_DPERP(GparityWilsonImplDF);
INSTANTIATE_DPERP(ZWilsonImplFH);
INSTANTIATE_DPERP(ZWilsonImplDF);
#endif
}}

View File

@ -29,7 +29,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
*************************************************************************************/
/* END LEGAL */
#include <Grid/Eigen/Dense>
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
@ -137,6 +137,20 @@ template void CayleyFermion5D<WilsonImplF>::MooeeInternal(const FermionField &ps
template void CayleyFermion5D<WilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZWilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZWilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
INSTANTIATE_DPERP(GparityWilsonImplFH);
INSTANTIATE_DPERP(GparityWilsonImplDF);
INSTANTIATE_DPERP(WilsonImplFH);
INSTANTIATE_DPERP(WilsonImplDF);
INSTANTIATE_DPERP(ZWilsonImplFH);
INSTANTIATE_DPERP(ZWilsonImplDF);
template void CayleyFermion5D<GparityWilsonImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<GparityWilsonImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<WilsonImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<WilsonImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZWilsonImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZWilsonImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
#endif
}}

View File

@ -37,7 +37,6 @@ namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards
template<class Impl>
@ -152,6 +151,13 @@ void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &
INSTANTIATE_DPERP(GparityWilsonImplD);
INSTANTIATE_DPERP(ZWilsonImplF);
INSTANTIATE_DPERP(ZWilsonImplD);
INSTANTIATE_DPERP(WilsonImplFH);
INSTANTIATE_DPERP(WilsonImplDF);
INSTANTIATE_DPERP(GparityWilsonImplFH);
INSTANTIATE_DPERP(GparityWilsonImplDF);
INSTANTIATE_DPERP(ZWilsonImplFH);
INSTANTIATE_DPERP(ZWilsonImplDF);
#endif
}

View File

@ -808,10 +808,21 @@ INSTANTIATE_DPERP(DomainWallVec5dImplF);
INSTANTIATE_DPERP(ZDomainWallVec5dImplD);
INSTANTIATE_DPERP(ZDomainWallVec5dImplF);
INSTANTIATE_DPERP(DomainWallVec5dImplDF);
INSTANTIATE_DPERP(DomainWallVec5dImplFH);
INSTANTIATE_DPERP(ZDomainWallVec5dImplDF);
INSTANTIATE_DPERP(ZDomainWallVec5dImplFH);
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
}}

View File

@ -68,7 +68,7 @@ namespace Grid {
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
assert(zdata->n==this->Ls);
// std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl;
std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl;
// Call base setter
this->SetCoefficientsTanh(zdata,1.0,0.0);

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -58,6 +58,7 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
#include <Grid/qcd/action/fermion/MobiusFermion.h>
#include <Grid/qcd/action/fermion/ZMobiusFermion.h>
#include <Grid/qcd/action/fermion/SchurDiagTwoKappa.h>
#include <Grid/qcd/action/fermion/ScaledShamirFermion.h>
#include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h>
#include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h>
@ -88,6 +89,10 @@ typedef WilsonFermion<WilsonImplR> WilsonFermionR;
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
typedef WilsonFermion<WilsonImplRL> WilsonFermionRL;
typedef WilsonFermion<WilsonImplFH> WilsonFermionFH;
typedef WilsonFermion<WilsonImplDF> WilsonFermionDF;
typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR;
typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF;
typedef WilsonFermion<WilsonAdjImplD> WilsonAdjFermionD;
@ -104,27 +109,50 @@ typedef DomainWallFermion<WilsonImplR> DomainWallFermionR;
typedef DomainWallFermion<WilsonImplF> DomainWallFermionF;
typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL;
typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH;
typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF;
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
typedef MobiusFermion<WilsonImplRL> MobiusFermionRL;
typedef MobiusFermion<WilsonImplFH> MobiusFermionFH;
typedef MobiusFermion<WilsonImplDF> MobiusFermionDF;
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL;
typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH;
typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF;
// Ls vectorised
typedef DomainWallFermion<DomainWallVec5dImplR> DomainWallFermionVec5dR;
typedef DomainWallFermion<DomainWallVec5dImplF> DomainWallFermionVec5dF;
typedef DomainWallFermion<DomainWallVec5dImplD> DomainWallFermionVec5dD;
typedef DomainWallFermion<DomainWallVec5dImplRL> DomainWallFermionVec5dRL;
typedef DomainWallFermion<DomainWallVec5dImplFH> DomainWallFermionVec5dFH;
typedef DomainWallFermion<DomainWallVec5dImplDF> DomainWallFermionVec5dDF;
typedef MobiusFermion<DomainWallVec5dImplR> MobiusFermionVec5dR;
typedef MobiusFermion<DomainWallVec5dImplF> MobiusFermionVec5dF;
typedef MobiusFermion<DomainWallVec5dImplD> MobiusFermionVec5dD;
typedef MobiusFermion<DomainWallVec5dImplRL> MobiusFermionVec5dRL;
typedef MobiusFermion<DomainWallVec5dImplFH> MobiusFermionVec5dFH;
typedef MobiusFermion<DomainWallVec5dImplDF> MobiusFermionVec5dDF;
typedef ZMobiusFermion<ZDomainWallVec5dImplR> ZMobiusFermionVec5dR;
typedef ZMobiusFermion<ZDomainWallVec5dImplF> ZMobiusFermionVec5dF;
typedef ZMobiusFermion<ZDomainWallVec5dImplD> ZMobiusFermionVec5dD;
typedef ZMobiusFermion<ZDomainWallVec5dImplRL> ZMobiusFermionVec5dRL;
typedef ZMobiusFermion<ZDomainWallVec5dImplFH> ZMobiusFermionVec5dFH;
typedef ZMobiusFermion<ZDomainWallVec5dImplDF> ZMobiusFermionVec5dDF;
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
@ -165,17 +193,35 @@ typedef OverlapWilsonPartialFractionZolotarevFermion<WilsonImplD> OverlapWilsonP
typedef WilsonFermion<GparityWilsonImplR> GparityWilsonFermionR;
typedef WilsonFermion<GparityWilsonImplF> GparityWilsonFermionF;
typedef WilsonFermion<GparityWilsonImplD> GparityWilsonFermionD;
typedef WilsonFermion<GparityWilsonImplRL> GparityWilsonFermionRL;
typedef WilsonFermion<GparityWilsonImplFH> GparityWilsonFermionFH;
typedef WilsonFermion<GparityWilsonImplDF> GparityWilsonFermionDF;
typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR;
typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF;
typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD;
typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL;
typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH;
typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF;
typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR;
typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF;
typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD;
typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL;
typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH;
typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF;
typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR;
typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF;
typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD;
typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL;
typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH;
typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF;
typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR;
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
typedef ImprovedStaggeredFermion<StaggeredImplD> ImprovedStaggeredFermionD;

View File

@ -55,7 +55,14 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
template class A<ZWilsonImplF>; \
template class A<ZWilsonImplD>; \
template class A<GparityWilsonImplF>; \
template class A<GparityWilsonImplD>;
template class A<GparityWilsonImplD>; \
template class A<WilsonImplFH>; \
template class A<WilsonImplDF>; \
template class A<ZWilsonImplFH>; \
template class A<ZWilsonImplDF>; \
template class A<GparityWilsonImplFH>; \
template class A<GparityWilsonImplDF>;
#define AdjointFermOpTemplateInstantiate(A) \
template class A<WilsonAdjImplF>; \
@ -69,7 +76,11 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
template class A<DomainWallVec5dImplF>; \
template class A<DomainWallVec5dImplD>; \
template class A<ZDomainWallVec5dImplF>; \
template class A<ZDomainWallVec5dImplD>;
template class A<ZDomainWallVec5dImplD>; \
template class A<DomainWallVec5dImplFH>; \
template class A<DomainWallVec5dImplDF>; \
template class A<ZDomainWallVec5dImplFH>; \
template class A<ZDomainWallVec5dImplDF>;
#define FermOpTemplateInstantiate(A) \
FermOp4dVecTemplateInstantiate(A) \

View File

@ -35,7 +35,6 @@ directory
namespace Grid {
namespace QCD {
//////////////////////////////////////////////
// Template parameter class constructs to package
// externally control Fermion implementations
@ -44,7 +43,7 @@ namespace QCD {
// Ultimately need Impl to always define types where XXX is opaque
//
// typedef typename XXX Simd;
// typedef typename XXX GaugeLinkField;
// typedef typename XXX GaugeLinkField;
// typedef typename XXX GaugeField;
// typedef typename XXX GaugeActField;
// typedef typename XXX FermionField;
@ -89,7 +88,53 @@ namespace QCD {
//
// }
//////////////////////////////////////////////
template <class T> struct SamePrecisionMapper {
typedef T HigherPrecVector ;
typedef T LowerPrecVector ;
};
template <class T> struct LowerPrecisionMapper { };
template <> struct LowerPrecisionMapper<vRealF> {
typedef vRealF HigherPrecVector ;
typedef vRealH LowerPrecVector ;
};
template <> struct LowerPrecisionMapper<vRealD> {
typedef vRealD HigherPrecVector ;
typedef vRealF LowerPrecVector ;
};
template <> struct LowerPrecisionMapper<vComplexF> {
typedef vComplexF HigherPrecVector ;
typedef vComplexH LowerPrecVector ;
};
template <> struct LowerPrecisionMapper<vComplexD> {
typedef vComplexD HigherPrecVector ;
typedef vComplexF LowerPrecVector ;
};
struct CoeffReal {
public:
typedef RealD _Coeff_t;
static const int Nhcs = 2;
template<class Simd> using PrecisionMapper = SamePrecisionMapper<Simd>;
};
struct CoeffRealHalfComms {
public:
typedef RealD _Coeff_t;
static const int Nhcs = 1;
template<class Simd> using PrecisionMapper = LowerPrecisionMapper<Simd>;
};
struct CoeffComplex {
public:
typedef ComplexD _Coeff_t;
static const int Nhcs = 2;
template<class Simd> using PrecisionMapper = SamePrecisionMapper<Simd>;
};
struct CoeffComplexHalfComms {
public:
typedef ComplexD _Coeff_t;
static const int Nhcs = 1;
template<class Simd> using PrecisionMapper = LowerPrecisionMapper<Simd>;
};
////////////////////////////////////////////////////////////////////////
// Implementation dependent fermion types
@ -108,58 +153,63 @@ namespace QCD {
typedef typename Impl::Coeff_t Coeff_t; \
#define INHERIT_IMPL_TYPES(Base) \
INHERIT_GIMPL_TYPES(Base) \
INHERIT_GIMPL_TYPES(Base) \
INHERIT_FIMPL_TYPES(Base)
/////////////////////////////////////////////////////////////////////////////
// Single flavour four spinors with colour index
/////////////////////////////////////////////////////////////////////////////
template <class S, class Representation = FundamentalRepresentation,class _Coeff_t = RealD >
template <class S, class Representation = FundamentalRepresentation,class Options = CoeffReal >
class WilsonImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
public:
static const int Dimension = Representation::Dimension;
static const bool LsVectorised=false;
static const int Nhcs = Options::Nhcs;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
const bool LsVectorised=false;
typedef _Coeff_t Coeff_t;
INHERIT_GIMPL_TYPES(Gimpl);
typedef typename Options::_Coeff_t Coeff_t;
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
template <typename vtype> using iImplPropagator = iScalar<iMatrix<iMatrix<vtype, Dimension>, Ns> >;
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
template <typename vtype> using iImplHalfCommSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhcs> >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplPropagator<Simd> SitePropagator;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplHalfCommSpinor<SimdL> SiteHalfCommSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SitePropagator> PropagatorField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonCompressor<SiteHalfCommSpinor,SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
ImplParams Params;
WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){
assert(Params.boundary_phases.size() == Nd);
};
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
inline void multLink(SiteHalfSpinor &phi,
const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi,
int mu,
StencilEntry *SE,
StencilImpl &St) {
const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi,
int mu,
StencilEntry *SE,
StencilImpl &St) {
mult(&phi(), &U(mu), &chi());
}
@ -169,16 +219,34 @@ namespace QCD {
}
inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &Uds,
const GaugeField &Umu) {
DoubledGaugeField &Uds,
const GaugeField &Umu)
{
typedef typename Simd::scalar_type scalar_type;
conformable(Uds._grid, GaugeGrid);
conformable(Umu._grid, GaugeGrid);
GaugeLinkField U(GaugeGrid);
GaugeLinkField tmp(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid);
for (int mu = 0; mu < Nd; mu++) {
U = PeekIndex<LorentzIndex>(Umu, mu);
PokeIndex<LorentzIndex>(Uds, U, mu);
U = adj(Cshift(U, mu, -1));
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
auto pha = Params.boundary_phases[mu];
scalar_type phase( real(pha),imag(pha) );
int Lmu = GaugeGrid->GlobalDimensions()[mu] - 1;
LatticeCoordinate(coor, mu);
U = PeekIndex<LorentzIndex>(Umu, mu);
tmp = where(coor == Lmu, phase * U, U);
PokeIndex<LorentzIndex>(Uds, tmp, mu);
U = adj(Cshift(U, mu, -1));
U = where(coor == 0, conjugate(phase) * U, U);
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
}
}
@ -195,11 +263,11 @@ namespace QCD {
tmp = zero;
parallel_for(int sss=0;sss<tmp._grid->oSites();sss++){
int sU=sss;
for(int s=0;s<Ls;s++){
int sF = s+Ls*sU;
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
}
int sU=sss;
for(int s=0;s<Ls;s++){
int sF = s+Ls*sU;
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
}
}
PokeIndex<LorentzIndex>(mat,tmp,mu);
@ -209,31 +277,34 @@ namespace QCD {
////////////////////////////////////////////////////////////////////////////////////
// Single flavour four spinors with colour index, 5d redblack
////////////////////////////////////////////////////////////////////////////////////
template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
template<class S,int Nrepresentation=Nc, class Options=CoeffReal>
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
public:
static const int Dimension = Nrepresentation;
const bool LsVectorised=true;
typedef _Coeff_t Coeff_t;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
static const int Dimension = Nrepresentation;
static const bool LsVectorised=true;
static const int Nhcs = Options::Nhcs;
typedef typename Options::_Coeff_t Coeff_t;
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
template <typename vtype> using iImplPropagator = iScalar<iMatrix<iMatrix<vtype, Nrepresentation>, Ns> >;
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
template <typename vtype> using iImplHalfCommSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhcs> >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>;
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplPropagator<Simd> SitePropagator;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SitePropagator> PropagatorField;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplPropagator<Simd> SitePropagator;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplHalfCommSpinor<SimdL> SiteHalfCommSpinor;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SitePropagator> PropagatorField;
/////////////////////////////////////////////////
// Make the doubled gauge field a *scalar*
@ -241,9 +312,9 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
typedef iImplGaugeLink<typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonCompressor<SiteHalfCommSpinor,SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
@ -259,12 +330,12 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
}
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
StencilImpl &St) {
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
StencilImpl &St) {
SiteGaugeLink UU;
for (int i = 0; i < Nrepresentation; i++) {
for (int j = 0; j < Nrepresentation; j++) {
vsplat(UU()()(i, j), U(mu)()(i, j));
vsplat(UU()()(i, j), U(mu)()(i, j));
}
}
mult(&phi(), &UU(), &chi());
@ -301,45 +372,90 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
{
assert(0);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,FermionField &Atilde, int mu)
{
assert(0);
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) {
assert(0);
// Following lines to be revised after Peter's addition of half prec
// missing put lane...
/*
typedef decltype(traceIndex<SpinIndex>(outerProduct(Btilde[0], Atilde[0]))) result_type;
unsigned int LLs = Btilde._grid->_rdimensions[0];
conformable(Atilde._grid,Btilde._grid);
GridBase* grid = mat._grid;
GridBase* Bgrid = Btilde._grid;
unsigned int dimU = grid->Nd();
unsigned int dimF = Bgrid->Nd();
GaugeLinkField tmp(grid);
tmp = zero;
// FIXME
// Current implementation works, thread safe, probably suboptimal
// Passing through the local coordinate for grid transformation
// the force grid is in general very different from the Ls vectorized grid
PARALLEL_FOR_LOOP
for (int so = 0; so < grid->oSites(); so++) {
std::vector<typename result_type::scalar_object> vres(Bgrid->Nsimd());
std::vector<int> ocoor; grid->oCoorFromOindex(ocoor,so);
for (int si = 0; si < tmp._grid->iSites(); si++){
typename result_type::scalar_object scalar_object; scalar_object = zero;
std::vector<int> local_coor;
std::vector<int> icoor; grid->iCoorFromIindex(icoor,si);
grid->InOutCoorToLocalCoor(ocoor, icoor, local_coor);
for (int s = 0; s < LLs; s++) {
std::vector<int> slocal_coor(dimF);
slocal_coor[0] = s;
for (int s4d = 1; s4d< dimF; s4d++) slocal_coor[s4d] = local_coor[s4d-1];
int sF = Bgrid->oIndexReduced(slocal_coor);
assert(sF < Bgrid->oSites());
extract(traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF])), vres);
// sum across the 5d dimension
for (auto v : vres) scalar_object += v;
}
tmp._odata[so].putlane(scalar_object, si);
}
}
PokeIndex<LorentzIndex>(mat, tmp, mu);
*/
}
};
////////////////////////////////////////////////////////////////////////////////////////
// Flavour doubled spinors; is Gparity the only? what about C*?
////////////////////////////////////////////////////////////////////////////////////////
template <class S, int Nrepresentation,class _Coeff_t = RealD>
template <class S, int Nrepresentation, class Options=CoeffReal>
class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
public:
static const int Dimension = Nrepresentation;
static const int Nhcs = Options::Nhcs;
static const bool LsVectorised=false;
const bool LsVectorised=false;
typedef _Coeff_t Coeff_t;
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
typedef typename Options::_Coeff_t Coeff_t;
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
template <typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>;
template <typename vtype> using iImplPropagator = iVector<iMatrix<iMatrix<vtype, Nrepresentation>, Ns>, Ngp >;
template <typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>;
template <typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>;
template <typename vtype> using iImplPropagator = iVector<iMatrix<iMatrix<vtype, Nrepresentation>, Ns>, Ngp>;
template <typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>;
template <typename vtype> using iImplHalfCommSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhcs>, Ngp>;
template <typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplPropagator<Simd> SitePropagator;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplPropagator<Simd> SitePropagator;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplHalfCommSpinor<SimdL> SiteHalfCommSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SitePropagator> PropagatorField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonCompressor<SiteHalfCommSpinor,SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
typedef GparityWilsonImplParams ImplParams;
@ -353,19 +469,19 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
// provide the multiply by link that is differentiated between Gparity (with
// flavour index) and non-Gparity
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
StencilImpl &St) {
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
StencilImpl &St) {
typedef SiteHalfSpinor vobj;
typedef typename SiteHalfSpinor::scalar_object sobj;
typedef SiteHalfSpinor vobj;
typedef typename SiteHalfSpinor::scalar_object sobj;
vobj vtmp;
sobj stmp;
GridBase *grid = St._grid;
const int Nsimd = grid->Nsimd();
int direction = St._directions[mu];
int distance = St._distances[mu];
int ptype = St._permute_type[mu];
@ -373,13 +489,13 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
// Fixme X.Y.Z.T hardcode in stencil
int mmu = mu % Nd;
// assert our assumptions
assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
assert((sl == 1) || (sl == 2));
std::vector<int> icoor;
if ( SE->_around_the_world && Params.twists[mmu] ) {
if ( sl == 2 ) {
@ -389,25 +505,25 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
extract(chi,vals);
for(int s=0;s<Nsimd;s++){
grid->iCoorFromIindex(icoor,s);
assert((icoor[direction]==0)||(icoor[direction]==1));
int permute_lane;
if ( distance == 1) {
permute_lane = icoor[direction]?1:0;
} else {
permute_lane = icoor[direction]?0:1;
}
if ( permute_lane ) {
stmp(0) = vals[s](1);
stmp(1) = vals[s](0);
vals[s] = stmp;
}
grid->iCoorFromIindex(icoor,s);
assert((icoor[direction]==0)||(icoor[direction]==1));
int permute_lane;
if ( distance == 1) {
permute_lane = icoor[direction]?1:0;
} else {
permute_lane = icoor[direction]?0:1;
}
if ( permute_lane ) {
stmp(0) = vals[s](1);
stmp(1) = vals[s](0);
vals[s] = stmp;
}
}
merge(vtmp,vals);
} else {
vtmp(0) = chi(1);
vtmp(1) = chi(0);
@ -432,11 +548,11 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
GaugeLinkField Uconj(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid);
for(int mu=0;mu<Nd;mu++){
LatticeCoordinate(coor,mu);
U = PeekIndex<LorentzIndex>(Umu,mu);
Uconj = conjugate(U);
@ -450,7 +566,7 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
Uds[ss](0)(mu) = U[ss]();
Uds[ss](1)(mu) = Uconj[ss]();
}
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
Uconj = adj(Cshift(Uconj,mu,-1));
@ -458,11 +574,12 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
if ( Params.twists[mu] ) {
Utmp = where(coor==0,Uconj,Utmp);
}
parallel_for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](0)(mu+4) = Utmp[ss]();
}
Utmp = Uconj;
if ( Params.twists[mu] ) {
Utmp = where(coor==0,U,Utmp);
@ -471,11 +588,10 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
parallel_for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](1)(mu+4) = Utmp[ss]();
}
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) {
// DhopDir provides U or Uconj depending on coor/flavour.
@ -483,7 +599,7 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
// use lorentz for flavour as hack.
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
parallel_for(auto ss = tmp.begin(); ss < tmp.end(); ss++) {
link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1));
link[ss]() = tmp[ss](0, 0) + conjugate(tmp[ss](1, 1));
}
PokeIndex<LorentzIndex>(mat, link, mu);
return;
@ -492,7 +608,7 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) {
int Ls = Btilde._grid->_fdimensions[0];
GaugeLinkField tmp(mat._grid);
tmp = zero;
parallel_for(int ss = 0; ss < tmp._grid->oSites(); ss++) {
@ -508,23 +624,22 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
};
/////////////////////////////////////////////////////////////////////////////
// Single flavour one component spinors with colour index
/////////////////////////////////////////////////////////////////////////////
template <class S, class Representation = FundamentalRepresentation >
class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
/////////////////////////////////////////////////////////////////////////////
// Single flavour one component spinors with colour index
/////////////////////////////////////////////////////////////////////////////
template <class S, class Representation = FundamentalRepresentation >
class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
public:
typedef RealD _Coeff_t ;
static const int Dimension = Representation::Dimension;
static const bool LsVectorised=false;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
const bool LsVectorised=false;
typedef _Coeff_t Coeff_t;
INHERIT_GIMPL_TYPES(Gimpl);
@ -641,8 +756,6 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
}
};
/////////////////////////////////////////////////////////////////////////////
// Single flavour one component spinors with colour index. 5d vec
/////////////////////////////////////////////////////////////////////////////
@ -651,16 +764,14 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
public:
typedef RealD _Coeff_t ;
static const int Dimension = Representation::Dimension;
static const bool LsVectorised=true;
typedef RealD Coeff_t ;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
const bool LsVectorised=true;
typedef _Coeff_t Coeff_t;
INHERIT_GIMPL_TYPES(Gimpl);
@ -823,43 +934,61 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
}
};
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffReal > WilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation > WilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double
typedef WilsonImpl<vComplex, AdjointRepresentation > WilsonAdjImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, AdjointRepresentation > WilsonAdjImplF; // Float
typedef WilsonImpl<vComplexD, AdjointRepresentation > WilsonAdjImplD; // Double
typedef WilsonImpl<vComplex, AdjointRepresentation, CoeffReal > WilsonAdjImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF; // Float
typedef WilsonImpl<vComplexD, AdjointRepresentation, CoeffReal > WilsonAdjImplD; // Double
typedef WilsonImpl<vComplex, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplD; // Double
typedef WilsonImpl<vComplex, TwoIndexSymmetricRepresentation, CoeffReal > WilsonTwoIndexSymmetricImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation, CoeffReal > WilsonTwoIndexSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation, CoeffReal > WilsonTwoIndexSymmetricImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc, CoeffReal> DomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc, CoeffReal> DomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc, CoeffReal> DomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc, CoeffRealHalfComms> DomainWallVec5dImplRL; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc, CoeffRealHalfComms> DomainWallVec5dImplFH; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc, CoeffRealHalfComms> DomainWallVec5dImplDF; // Double
typedef GparityWilsonImpl<vComplex , Nc> GparityWilsonImplR; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF; // Float
typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc,CoeffComplex> ZDomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc,CoeffComplex> ZDomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc,CoeffComplex> ZDomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc,CoeffComplexHalfComms> ZDomainWallVec5dImplRL; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc,CoeffComplexHalfComms> ZDomainWallVec5dImplFH; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc,CoeffComplexHalfComms> ZDomainWallVec5dImplDF; // Double
typedef GparityWilsonImpl<vComplex , Nc,CoeffReal> GparityWilsonImplR; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF, Nc,CoeffReal> GparityWilsonImplF; // Float
typedef GparityWilsonImpl<vComplexD, Nc,CoeffReal> GparityWilsonImplD; // Double
typedef GparityWilsonImpl<vComplex , Nc,CoeffRealHalfComms> GparityWilsonImplRL; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF, Nc,CoeffRealHalfComms> GparityWilsonImplFH; // Float
typedef GparityWilsonImpl<vComplexD, Nc,CoeffRealHalfComms> GparityWilsonImplDF; // Double
typedef StaggeredImpl<vComplex, FundamentalRepresentation > StaggeredImplR; // Real.. whichever prec
typedef StaggeredImpl<vComplexF, FundamentalRepresentation > StaggeredImplF; // Float
typedef StaggeredImpl<vComplexD, FundamentalRepresentation > StaggeredImplD; // Double
typedef StaggeredImpl<vComplex, FundamentalRepresentation > StaggeredImplR; // Real.. whichever prec
typedef StaggeredImpl<vComplexF, FundamentalRepresentation > StaggeredImplF; // Float
typedef StaggeredImpl<vComplexD, FundamentalRepresentation > StaggeredImplD; // Double
typedef StaggeredVec5dImpl<vComplex, FundamentalRepresentation > StaggeredVec5dImplR; // Real.. whichever prec
typedef StaggeredVec5dImpl<vComplexF, FundamentalRepresentation > StaggeredVec5dImplF; // Float
typedef StaggeredVec5dImpl<vComplexD, FundamentalRepresentation > StaggeredVec5dImplD; // Double
typedef StaggeredVec5dImpl<vComplex, FundamentalRepresentation > StaggeredVec5dImplR; // Real.. whichever prec
typedef StaggeredVec5dImpl<vComplexF, FundamentalRepresentation > StaggeredVec5dImplF; // Float
typedef StaggeredVec5dImpl<vComplexD, FundamentalRepresentation > StaggeredVec5dImplD; // Double
}}

View File

@ -160,8 +160,6 @@ void ImprovedStaggeredFermion<Impl>::ImportGauge(const GaugeField &_Uthin,const
PokeIndex<LorentzIndex>(UUUmu, U*(-0.5*c2/u0/u0/u0), mu+4);
}
std::cout << " Umu " << Umu._odata[0]<<std::endl;
std::cout << " UUUmu " << UUUmu._odata[0]<<std::endl;
pickCheckerboard(Even, UmuEven, Umu);
pickCheckerboard(Odd, UmuOdd , Umu);
pickCheckerboard(Even, UUUmuEven, UUUmu);

View File

@ -0,0 +1,102 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: SchurDiagTwoKappa.h
Copyright (C) 2017
Author: Christoph Lehner
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef _SCHUR_DIAG_TWO_KAPPA_H
#define _SCHUR_DIAG_TWO_KAPPA_H
namespace Grid {
// This is specific to (Z)mobius fermions
template<class Matrix, class Field>
class KappaSimilarityTransform {
public:
INHERIT_IMPL_TYPES(Matrix);
std::vector<Coeff_t> kappa, kappaDag, kappaInv, kappaInvDag;
KappaSimilarityTransform (Matrix &zmob) {
for (int i=0;i<(int)zmob.bs.size();i++) {
Coeff_t k = 1.0 / ( 2.0 * (zmob.bs[i] *(4 - zmob.M5) + 1.0) );
kappa.push_back( k );
kappaDag.push_back( conj(k) );
kappaInv.push_back( 1.0 / k );
kappaInvDag.push_back( 1.0 / conj(k) );
}
}
template<typename vobj>
void sscale(const Lattice<vobj>& in, Lattice<vobj>& out, Coeff_t* s) {
GridBase *grid=out._grid;
out.checkerboard = in.checkerboard;
assert(grid->_simd_layout[0] == 1); // should be fine for ZMobius for now
int Ls = grid->_rdimensions[0];
parallel_for(int ss=0;ss<grid->oSites();ss++){
vobj tmp = s[ss % Ls]*in._odata[ss];
vstream(out._odata[ss],tmp);
}
}
RealD sscale_norm(const Field& in, Field& out, Coeff_t* s) {
sscale(in,out,s);
return norm2(out);
}
virtual RealD M (const Field& in, Field& out) { return sscale_norm(in,out,&kappa[0]); }
virtual RealD MDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaDag[0]);}
virtual RealD MInv (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInv[0]);}
virtual RealD MInvDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInvDag[0]);}
};
template<class Matrix,class Field>
class SchurDiagTwoKappaOperator : public SchurOperatorBase<Field> {
public:
KappaSimilarityTransform<Matrix, Field> _S;
SchurDiagTwoOperator<Matrix, Field> _Mat;
SchurDiagTwoKappaOperator (Matrix &Mat): _S(Mat), _Mat(Mat) {};
virtual RealD Mpc (const Field &in, Field &out) {
Field tmp(in._grid);
_S.MInv(in,out);
_Mat.Mpc(out,tmp);
return _S.M(tmp,out);
}
virtual RealD MpcDag (const Field &in, Field &out){
Field tmp(in._grid);
_S.MDag(in,out);
_Mat.MpcDag(out,tmp);
return _S.MInvDag(tmp,out);
}
};
}
#endif

View File

@ -33,228 +33,321 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
namespace Grid {
namespace QCD {
template<class SiteHalfSpinor,class SiteSpinor>
class WilsonCompressor {
public:
int mu;
int dag;
/////////////////////////////////////////////////////////////////////////////////////////////
// optimised versions supporting half precision too
/////////////////////////////////////////////////////////////////////////////////////////////
WilsonCompressor(int _dag){
mu=0;
dag=_dag;
assert((dag==0)||(dag==1));
}
void Point(int p) {
mu=p;
};
template<class _HCspinor,class _Hspinor,class _Spinor, class projector,typename SFINAE = void >
class WilsonCompressorTemplate;
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
SiteHalfSpinor ret;
int mudag=mu;
if (!dag) {
mudag=(mu+Nd)%(2*Nd);
}
switch(mudag) {
case Xp:
spProjXp(ret,in);
break;
case Yp:
spProjYp(ret,in);
break;
case Zp:
spProjZp(ret,in);
break;
case Tp:
spProjTp(ret,in);
break;
case Xm:
spProjXm(ret,in);
break;
case Ym:
spProjYm(ret,in);
break;
case Zm:
spProjZm(ret,in);
break;
case Tm:
spProjTm(ret,in);
break;
default:
assert(0);
break;
}
return ret;
}
};
/////////////////////////
// optimised versions
/////////////////////////
template<class _HCspinor,class _Hspinor,class _Spinor, class projector>
class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
typename std::enable_if<std::is_same<_HCspinor,_Hspinor>::value>::type >
{
public:
int mu,dag;
template<class SiteHalfSpinor,class SiteSpinor>
class WilsonXpCompressor {
public:
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
SiteHalfSpinor ret;
spProjXp(ret,in);
return ret;
}
};
template<class SiteHalfSpinor,class SiteSpinor>
class WilsonYpCompressor {
public:
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
SiteHalfSpinor ret;
spProjYp(ret,in);
return ret;
}
};
template<class SiteHalfSpinor,class SiteSpinor>
class WilsonZpCompressor {
public:
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
SiteHalfSpinor ret;
spProjZp(ret,in);
return ret;
}
};
template<class SiteHalfSpinor,class SiteSpinor>
class WilsonTpCompressor {
public:
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
SiteHalfSpinor ret;
spProjTp(ret,in);
return ret;
}
};
void Point(int p) { mu=p; };
template<class SiteHalfSpinor,class SiteSpinor>
class WilsonXmCompressor {
public:
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
SiteHalfSpinor ret;
spProjXm(ret,in);
return ret;
}
};
template<class SiteHalfSpinor,class SiteSpinor>
class WilsonYmCompressor {
public:
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
SiteHalfSpinor ret;
spProjYm(ret,in);
return ret;
}
};
template<class SiteHalfSpinor,class SiteSpinor>
class WilsonZmCompressor {
public:
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
SiteHalfSpinor ret;
spProjZm(ret,in);
return ret;
}
};
template<class SiteHalfSpinor,class SiteSpinor>
class WilsonTmCompressor {
public:
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
SiteHalfSpinor ret;
spProjTm(ret,in);
return ret;
}
};
WilsonCompressorTemplate(int _dag=0){
dag = _dag;
}
// Fast comms buffer manipulation which should inline right through (avoid direction
// dependent logic that prevents inlining
template<class vobj,class cobj>
class WilsonStencil : public CartesianStencil<vobj,cobj> {
public:
typedef _Spinor SiteSpinor;
typedef _Hspinor SiteHalfSpinor;
typedef _HCspinor SiteHalfCommSpinor;
typedef typename SiteHalfCommSpinor::vector_type vComplexLow;
typedef typename SiteHalfSpinor::vector_type vComplexHigh;
constexpr static int Nw=sizeof(SiteHalfSpinor)/sizeof(vComplexHigh);
typedef CartesianCommunicator::CommsRequest_t CommsRequest_t;
inline int CommDatumSize(void) {
return sizeof(SiteHalfCommSpinor);
}
WilsonStencil(GridBase *grid,
/*****************************************************/
/* Compress includes precision change if mpi data is not same */
/*****************************************************/
inline void Compress(SiteHalfSpinor *buf,Integer o,const SiteSpinor &in) {
projector::Proj(buf[o],in,mu,dag);
}
/*****************************************************/
/* Exchange includes precision change if mpi data is not same */
/*****************************************************/
inline void Exchange(SiteHalfSpinor *mp,
SiteHalfSpinor *vp0,
SiteHalfSpinor *vp1,
Integer type,Integer o){
exchange(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type);
}
/*****************************************************/
/* Have a decompression step if mpi data is not same */
/*****************************************************/
inline void Decompress(SiteHalfSpinor *out,
SiteHalfSpinor *in, Integer o) {
assert(0);
}
/*****************************************************/
/* Compress Exchange */
/*****************************************************/
inline void CompressExchange(SiteHalfSpinor *out0,
SiteHalfSpinor *out1,
const SiteSpinor *in,
Integer j,Integer k, Integer m,Integer type){
SiteHalfSpinor temp1, temp2,temp3,temp4;
projector::Proj(temp1,in[k],mu,dag);
projector::Proj(temp2,in[m],mu,dag);
exchange(out0[j],out1[j],temp1,temp2,type);
}
/*****************************************************/
/* Pass the info to the stencil */
/*****************************************************/
inline bool DecompressionStep(void) { return false; }
};
template<class _HCspinor,class _Hspinor,class _Spinor, class projector>
class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type >
{
public:
int mu,dag;
void Point(int p) { mu=p; };
WilsonCompressorTemplate(int _dag=0){
dag = _dag;
}
typedef _Spinor SiteSpinor;
typedef _Hspinor SiteHalfSpinor;
typedef _HCspinor SiteHalfCommSpinor;
typedef typename SiteHalfCommSpinor::vector_type vComplexLow;
typedef typename SiteHalfSpinor::vector_type vComplexHigh;
constexpr static int Nw=sizeof(SiteHalfSpinor)/sizeof(vComplexHigh);
inline int CommDatumSize(void) {
return sizeof(SiteHalfCommSpinor);
}
/*****************************************************/
/* Compress includes precision change if mpi data is not same */
/*****************************************************/
inline void Compress(SiteHalfSpinor *buf,Integer o,const SiteSpinor &in) {
SiteHalfSpinor hsp;
SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf;
projector::Proj(hsp,in,mu,dag);
precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw);
}
/*****************************************************/
/* Exchange includes precision change if mpi data is not same */
/*****************************************************/
inline void Exchange(SiteHalfSpinor *mp,
SiteHalfSpinor *vp0,
SiteHalfSpinor *vp1,
Integer type,Integer o){
SiteHalfSpinor vt0,vt1;
SiteHalfCommSpinor *vpp0 = (SiteHalfCommSpinor *)vp0;
SiteHalfCommSpinor *vpp1 = (SiteHalfCommSpinor *)vp1;
precisionChange((vComplexHigh *)&vt0,(vComplexLow *)&vpp0[o],Nw);
precisionChange((vComplexHigh *)&vt1,(vComplexLow *)&vpp1[o],Nw);
exchange(mp[2*o],mp[2*o+1],vt0,vt1,type);
}
/*****************************************************/
/* Have a decompression step if mpi data is not same */
/*****************************************************/
inline void Decompress(SiteHalfSpinor *out,
SiteHalfSpinor *in, Integer o){
SiteHalfCommSpinor *hin=(SiteHalfCommSpinor *)in;
precisionChange((vComplexHigh *)&out[o],(vComplexLow *)&hin[o],Nw);
}
/*****************************************************/
/* Compress Exchange */
/*****************************************************/
inline void CompressExchange(SiteHalfSpinor *out0,
SiteHalfSpinor *out1,
const SiteSpinor *in,
Integer j,Integer k, Integer m,Integer type){
SiteHalfSpinor temp1, temp2,temp3,temp4;
SiteHalfCommSpinor *hout0 = (SiteHalfCommSpinor *)out0;
SiteHalfCommSpinor *hout1 = (SiteHalfCommSpinor *)out1;
projector::Proj(temp1,in[k],mu,dag);
projector::Proj(temp2,in[m],mu,dag);
exchange(temp3,temp4,temp1,temp2,type);
precisionChange((vComplexLow *)&hout0[j],(vComplexHigh *)&temp3,Nw);
precisionChange((vComplexLow *)&hout1[j],(vComplexHigh *)&temp4,Nw);
}
/*****************************************************/
/* Pass the info to the stencil */
/*****************************************************/
inline bool DecompressionStep(void) { return true; }
};
#define DECLARE_PROJ(Projector,Compressor,spProj) \
class Projector { \
public: \
template<class hsp,class fsp> \
static void Proj(hsp &result,const fsp &in,int mu,int dag){ \
spProj(result,in); \
} \
}; \
template<typename HCS,typename HS,typename S> using Compressor = WilsonCompressorTemplate<HCS,HS,S,Projector>;
DECLARE_PROJ(WilsonXpProjector,WilsonXpCompressor,spProjXp);
DECLARE_PROJ(WilsonYpProjector,WilsonYpCompressor,spProjYp);
DECLARE_PROJ(WilsonZpProjector,WilsonZpCompressor,spProjZp);
DECLARE_PROJ(WilsonTpProjector,WilsonTpCompressor,spProjTp);
DECLARE_PROJ(WilsonXmProjector,WilsonXmCompressor,spProjXm);
DECLARE_PROJ(WilsonYmProjector,WilsonYmCompressor,spProjYm);
DECLARE_PROJ(WilsonZmProjector,WilsonZmCompressor,spProjZm);
DECLARE_PROJ(WilsonTmProjector,WilsonTmCompressor,spProjTm);
class WilsonProjector {
public:
template<class hsp,class fsp>
static void Proj(hsp &result,const fsp &in,int mu,int dag){
int mudag=dag? mu : (mu+Nd)%(2*Nd);
switch(mudag) {
case Xp: spProjXp(result,in); break;
case Yp: spProjYp(result,in); break;
case Zp: spProjZp(result,in); break;
case Tp: spProjTp(result,in); break;
case Xm: spProjXm(result,in); break;
case Ym: spProjYm(result,in); break;
case Zm: spProjZm(result,in); break;
case Tm: spProjTm(result,in); break;
default: assert(0); break;
}
}
};
template<typename HCS,typename HS,typename S> using WilsonCompressor = WilsonCompressorTemplate<HCS,HS,S,WilsonProjector>;
// Fast comms buffer manipulation which should inline right through (avoid direction
// dependent logic that prevents inlining
template<class vobj,class cobj>
class WilsonStencil : public CartesianStencil<vobj,cobj> {
public:
typedef CartesianCommunicator::CommsRequest_t CommsRequest_t;
std::vector<int> same_node;
std::vector<int> surface_list;
WilsonStencil(GridBase *grid,
int npoints,
int checkerboard,
const std::vector<int> &directions,
const std::vector<int> &distances) : CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances)
{ };
template < class compressor>
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)
{
std::vector<std::vector<CommsRequest_t> > reqs;
HaloExchangeOptGather(source,compress);
this->CommunicateBegin(reqs);
this->calls++;
this->CommunicateComplete(reqs);
this->CommsMerge();
}
template < class compressor>
void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)
{
this->calls++;
this->Mergers.resize(0);
this->Packets.resize(0);
this->HaloGatherOpt(source,compress);
}
template < class compressor>
void HaloGatherOpt(const Lattice<vobj> &source,compressor &compress)
{
this->_grid->StencilBarrier();
// conformable(source._grid,_grid);
assert(source._grid==this->_grid);
this->halogtime-=usecond();
this->u_comm_offset=0;
int dag = compress.dag;
WilsonXpCompressor<cobj,vobj> XpCompress;
WilsonYpCompressor<cobj,vobj> YpCompress;
WilsonZpCompressor<cobj,vobj> ZpCompress;
WilsonTpCompressor<cobj,vobj> TpCompress;
WilsonXmCompressor<cobj,vobj> XmCompress;
WilsonYmCompressor<cobj,vobj> YmCompress;
WilsonZmCompressor<cobj,vobj> ZmCompress;
WilsonTmCompressor<cobj,vobj> TmCompress;
// Gather all comms buffers
// for(int point = 0 ; point < _npoints; point++) {
// compress.Point(point);
// HaloGatherDir(source,compress,point,face_idx);
// }
int face_idx=0;
if ( dag ) {
// std::cout << " Optimised Dagger compress " <<std::endl;
this->HaloGatherDir(source,XpCompress,Xp,face_idx);
this->HaloGatherDir(source,YpCompress,Yp,face_idx);
this->HaloGatherDir(source,ZpCompress,Zp,face_idx);
this->HaloGatherDir(source,TpCompress,Tp,face_idx);
this->HaloGatherDir(source,XmCompress,Xm,face_idx);
this->HaloGatherDir(source,YmCompress,Ym,face_idx);
this->HaloGatherDir(source,ZmCompress,Zm,face_idx);
this->HaloGatherDir(source,TmCompress,Tm,face_idx);
} else {
this->HaloGatherDir(source,XmCompress,Xp,face_idx);
this->HaloGatherDir(source,YmCompress,Yp,face_idx);
this->HaloGatherDir(source,ZmCompress,Zp,face_idx);
this->HaloGatherDir(source,TmCompress,Tp,face_idx);
this->HaloGatherDir(source,XpCompress,Xm,face_idx);
this->HaloGatherDir(source,YpCompress,Ym,face_idx);
this->HaloGatherDir(source,ZpCompress,Zm,face_idx);
this->HaloGatherDir(source,TpCompress,Tm,face_idx);
}
this->face_table_computed=1;
assert(this->u_comm_offset==this->_unified_buffer_size);
this->halogtime+=usecond();
}
const std::vector<int> &distances)
: CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances) ,
same_node(npoints)
{
surface_list.resize(0);
};
void BuildSurfaceList(int Ls,int vol4){
// find same node for SHM
// Here we know the distance is 1 for WilsonStencil
for(int point=0;point<this->_npoints;point++){
same_node[point] = this->SameNode(point);
// std::cout << " dir " <<point<<" same_node " <<same_node[point]<<std::endl;
}
for(int site = 0 ;site< vol4;site++){
int local = 1;
for(int point=0;point<this->_npoints;point++){
if( (!this->GetNodeLocal(site*Ls,point)) && (!same_node[point]) ){
local = 0;
}
}
if(local == 0) {
surface_list.push_back(site);
}
}
}
template < class compressor>
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)
{
std::vector<std::vector<CommsRequest_t> > reqs;
this->HaloExchangeOptGather(source,compress);
this->CommunicateBegin(reqs);
this->CommunicateComplete(reqs);
this->CommsMerge(compress);
this->CommsMergeSHM(compress);
}
template <class compressor>
void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)
{
this->Prepare();
this->HaloGatherOpt(source,compress);
}
template <class compressor>
void HaloGatherOpt(const Lattice<vobj> &source,compressor &compress)
{
// Strategy. Inherit types from Compressor.
// Use types to select the write direction by directon compressor
typedef typename compressor::SiteSpinor SiteSpinor;
typedef typename compressor::SiteHalfSpinor SiteHalfSpinor;
typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor;
this->_grid->StencilBarrier();
assert(source._grid==this->_grid);
this->halogtime-=usecond();
this->u_comm_offset=0;
WilsonXpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> XpCompress;
WilsonYpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> YpCompress;
WilsonZpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> ZpCompress;
WilsonTpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> TpCompress;
WilsonXmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> XmCompress;
WilsonYmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> YmCompress;
WilsonZmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> ZmCompress;
WilsonTmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> TmCompress;
int dag = compress.dag;
int face_idx=0;
if ( dag ) {
// std::cout << " Optimised Dagger compress " <<std::endl;
assert(same_node[Xp]==this->HaloGatherDir(source,XpCompress,Xp,face_idx));
assert(same_node[Yp]==this->HaloGatherDir(source,YpCompress,Yp,face_idx));
assert(same_node[Zp]==this->HaloGatherDir(source,ZpCompress,Zp,face_idx));
assert(same_node[Tp]==this->HaloGatherDir(source,TpCompress,Tp,face_idx));
assert(same_node[Xm]==this->HaloGatherDir(source,XmCompress,Xm,face_idx));
assert(same_node[Ym]==this->HaloGatherDir(source,YmCompress,Ym,face_idx));
assert(same_node[Zm]==this->HaloGatherDir(source,ZmCompress,Zm,face_idx));
assert(same_node[Tm]==this->HaloGatherDir(source,TmCompress,Tm,face_idx));
} else {
assert(same_node[Xp]==this->HaloGatherDir(source,XmCompress,Xp,face_idx));
assert(same_node[Yp]==this->HaloGatherDir(source,YmCompress,Yp,face_idx));
assert(same_node[Zp]==this->HaloGatherDir(source,ZmCompress,Zp,face_idx));
assert(same_node[Tp]==this->HaloGatherDir(source,TmCompress,Tp,face_idx));
assert(same_node[Xm]==this->HaloGatherDir(source,XpCompress,Xm,face_idx));
assert(same_node[Ym]==this->HaloGatherDir(source,YpCompress,Ym,face_idx));
assert(same_node[Zm]==this->HaloGatherDir(source,ZpCompress,Zm,face_idx));
assert(same_node[Tm]==this->HaloGatherDir(source,TpCompress,Tm,face_idx));
}
this->face_table_computed=1;
assert(this->u_comm_offset==this->_unified_buffer_size);
this->halogtime+=usecond();
}
};
}} // namespace close
#endif

View File

@ -230,8 +230,7 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
}
template <class Impl>
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U,
const FermionField &V, int dag) {
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _grid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
@ -242,12 +241,12 @@ void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U,
}
template <class Impl>
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U,
const FermionField &V, int dag) {
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
//conformable(U._grid, mat._grid); not general, leaving as a comment (Guido)
// Motivation: look at the SchurDiff operator
assert(V.checkerboard == Even);
assert(U.checkerboard == Odd);
mat.checkerboard = Odd;
@ -256,11 +255,10 @@ void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U,
}
template <class Impl>
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U,
const FermionField &V, int dag) {
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
//conformable(U._grid, mat._grid);
assert(V.checkerboard == Odd);
assert(U.checkerboard == Even);

View File

@ -11,6 +11,7 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -117,49 +118,19 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
// Allocate the required comms buffer
ImportGauge(_Umu);
// Build lists of exterior only nodes
int LLs = FiveDimGrid._rdimensions[0];
int vol4;
vol4=FourDimGrid.oSites();
Stencil.BuildSurfaceList(LLs,vol4);
vol4=FourDimRedBlackGrid.oSites();
StencilEven.BuildSurfaceList(LLs,vol4);
StencilOdd.BuildSurfaceList(LLs,vol4);
std::cout << GridLogMessage << " SurfaceLists "<< Stencil.surface_list.size()
<<" " << StencilEven.surface_list.size()<<std::endl;
}
/*
template<class Impl>
WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
RealD _M5,const ImplParams &p) :
{
int nsimd = Simd::Nsimd();
// some assertions
assert(FiveDimGrid._ndimension==5);
assert(FiveDimRedBlackGrid._ndimension==5);
assert(FiveDimRedBlackGrid._checker_dim==0); // Checkerboard the s-direction
assert(FourDimGrid._ndimension==4);
// Dimension zero of the five-d is the Ls direction
Ls=FiveDimGrid._fdimensions[0];
assert(FiveDimGrid._processors[0] ==1);
assert(FiveDimGrid._simd_layout[0] ==nsimd);
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
assert(FiveDimRedBlackGrid._processors[0] ==1);
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
// Other dimensions must match the decomposition of the four-D fields
for(int d=0;d<4;d++){
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
assert(FourDimGrid._simd_layout[d]=1);
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
}
{
}
}
*/
template<class Impl>
void WilsonFermion5D<Impl>::Report(void)
@ -296,6 +267,8 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
DerivCommTime+=usecond();
Atilde=A;
int LLs = B._grid->_rdimensions[0];
DerivComputeTime-=usecond();
for (int mu = 0; mu < Nd; mu++) {
@ -325,6 +298,9 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
////////////////////////////
}
}
////////////////////////////
// spin trace outer product
////////////////////////////
DerivDhopComputeTime += usecond();
Impl::InsertForce5D(mat, Btilde, Atilde, mu);
}
@ -333,13 +309,14 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
template<class Impl>
void WilsonFermion5D<Impl>::DhopDeriv(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
const FermionField &A,
const FermionField &B,
int dag)
{
conformable(A._grid,FermionGrid());
conformable(A._grid,B._grid);
conformable(GaugeGrid(),mat._grid);
//conformable(GaugeGrid(),mat._grid);// this is not general! leaving as a comment
mat.checkerboard = A.checkerboard;
@ -348,12 +325,11 @@ void WilsonFermion5D<Impl>::DhopDeriv(GaugeField &mat,
template<class Impl>
void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
const FermionField &A,
const FermionField &B,
int dag)
{
conformable(A._grid,FermionRedBlackGrid());
conformable(GaugeRedBlackGrid(),mat._grid);
conformable(A._grid,B._grid);
assert(B.checkerboard==Odd);
@ -366,12 +342,11 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
template<class Impl>
void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag)
const FermionField &A,
const FermionField &B,
int dag)
{
conformable(A._grid,FermionRedBlackGrid());
conformable(GaugeRedBlackGrid(),mat._grid);
conformable(A._grid,B._grid);
assert(B.checkerboard==Even);
@ -383,8 +358,8 @@ void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
template<class Impl>
void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag)
DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag)
{
DhopTotalTime-=usecond();
#ifdef GRID_OMP
@ -396,6 +371,7 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
DhopTotalTime+=usecond();
}
template<class Impl>
void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U,
@ -409,12 +385,21 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
int LLs = in._grid->_rdimensions[0];
int len = U._grid->oSites();
DhopFaceTime-=usecond();
st.HaloExchangeOptGather(in,compressor);
DhopFaceTime+=usecond();
std::vector<std::vector<CommsRequest_t> > reqs;
// Rely on async comms; start comms before merge of local data
DhopCommTime-=usecond();
st.CommunicateBegin(reqs);
DhopFaceTime-=usecond();
st.CommsMergeSHM(compressor);
DhopFaceTime+=usecond();
// Perhaps use omp task and region
#pragma omp parallel
{
int nthreads = omp_get_num_threads();
@ -425,8 +410,6 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
int sF = LLs * myoff;
if ( me == 0 ) {
DhopCommTime-=usecond();
st.CommunicateBegin(reqs);
st.CommunicateComplete(reqs);
DhopCommTime+=usecond();
} else {
@ -439,28 +422,37 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
}
DhopFaceTime-=usecond();
st.CommsMerge();
st.CommsMerge(compressor);
DhopFaceTime+=usecond();
#pragma omp parallel
{
int nthreads = omp_get_num_threads();
int me = omp_get_thread_num();
int myoff, mywork;
GridThread::GetWork(len,me,mywork,myoff,nthreads);
int sF = LLs * myoff;
// Exterior links in stencil
if ( me==0 ) DhopComputeTime2-=usecond();
if (dag == DaggerYes) Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,0,1);
else Kernels::DhopSite (st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,0,1);
if ( me==0 ) DhopComputeTime2+=usecond();
}// end parallel region
// Load imbalance alert. Should use dynamic schedule OMP for loop
// Perhaps create a list of only those sites with face work, and
// load balance process the list.
DhopComputeTime2-=usecond();
if (dag == DaggerYes) {
int sz=st.surface_list.size();
parallel_for (int ss = 0; ss < sz; ss++) {
int sU = st.surface_list[ss];
int sF = LLs * sU;
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
}
} else {
int sz=st.surface_list.size();
parallel_for (int ss = 0; ss < sz; ss++) {
int sU = st.surface_list[ss];
int sF = LLs * sU;
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
}
}
DhopComputeTime2+=usecond();
#else
assert(0);
#endif
}
template<class Impl>
void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U,
@ -679,7 +671,6 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
}
FermOpTemplateInstantiate(WilsonFermion5D);
GparityFermOpTemplateInstantiate(WilsonFermion5D);

View File

@ -33,52 +33,8 @@ directory
namespace Grid {
namespace QCD {
int WilsonKernelsStatic::Opt = WilsonKernelsStatic::OptGeneric;
int WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsAndCompute;
#ifdef QPX
#include <spi/include/kernel/location.h>
#include <spi/include/l1p/types.h>
#include <hwi/include/bqc/l1p_mmio.h>
#include <hwi/include/bqc/A2_inlines.h>
#endif
void bgq_l1p_optimisation(int mode)
{
#ifdef QPX
#undef L1P_CFG_PF_USR
#define L1P_CFG_PF_USR (0x3fde8000108ll) /* (64 bit reg, 23 bits wide, user/unpriv) */
uint64_t cfg_pf_usr;
if ( mode ) {
cfg_pf_usr =
L1P_CFG_PF_USR_ifetch_depth(0)
| L1P_CFG_PF_USR_ifetch_max_footprint(1)
| L1P_CFG_PF_USR_pf_stream_est_on_dcbt
| L1P_CFG_PF_USR_pf_stream_establish_enable
| L1P_CFG_PF_USR_pf_stream_optimistic
| L1P_CFG_PF_USR_pf_adaptive_throttle(0xF) ;
// if ( sizeof(Float) == sizeof(double) ) {
cfg_pf_usr |= L1P_CFG_PF_USR_dfetch_depth(2)| L1P_CFG_PF_USR_dfetch_max_footprint(3) ;
// } else {
// cfg_pf_usr |= L1P_CFG_PF_USR_dfetch_depth(1)| L1P_CFG_PF_USR_dfetch_max_footprint(2) ;
// }
} else {
cfg_pf_usr = L1P_CFG_PF_USR_dfetch_depth(1)
| L1P_CFG_PF_USR_dfetch_max_footprint(2)
| L1P_CFG_PF_USR_ifetch_depth(0)
| L1P_CFG_PF_USR_ifetch_max_footprint(1)
| L1P_CFG_PF_USR_pf_stream_est_on_dcbt
| L1P_CFG_PF_USR_pf_stream_establish_enable
| L1P_CFG_PF_USR_pf_stream_optimistic
| L1P_CFG_PF_USR_pf_stream_prefetch_enable;
}
*((uint64_t *)L1P_CFG_PF_USR) = cfg_pf_usr;
#endif
}
int WilsonKernelsStatic::Opt = WilsonKernelsStatic::OptGeneric;
int WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsAndCompute;
template <class Impl>
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
@ -86,12 +42,72 @@ WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
////////////////////////////////////////////
// Generic implementation; move to different file?
////////////////////////////////////////////
#define GENERIC_STENCIL_LEG(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if (SE->_is_local) { \
chi_p = &chi; \
if (SE->_permute) { \
spProj(tmp, in._odata[SE->_offset]); \
permute(chi, tmp, ptype); \
} else { \
spProj(chi, in._odata[SE->_offset]); \
} \
} else { \
chi_p = &buf[SE->_offset]; \
} \
Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \
Recon(result, Uchi);
#define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if (SE->_is_local) { \
chi_p = &chi; \
if (SE->_permute) { \
spProj(tmp, in._odata[SE->_offset]); \
permute(chi, tmp, ptype); \
} else { \
spProj(chi, in._odata[SE->_offset]); \
} \
} else if ( st.same_node[Dir] ) { \
chi_p = &buf[SE->_offset]; \
} \
if (SE->_is_local || st.same_node[Dir] ) { \
Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \
Recon(result, Uchi); \
}
#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \
SE = st.GetEntry(ptype, Dir, sF); \
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
chi_p = &buf[SE->_offset]; \
Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \
Recon(result, Uchi); \
nmu++; \
}
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
if (gamma == Dir) { \
if (SE->_is_local && SE->_permute) { \
spProj(tmp, in._odata[SE->_offset]); \
permute(chi, tmp, ptype); \
} else if (SE->_is_local) { \
spProj(chi, in._odata[SE->_offset]); \
} else { \
chi = buf[SE->_offset]; \
} \
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); \
Recon(result, Uchi); \
}
////////////////////////////////////////////////////////////////////
// All legs kernels ; comms then compute
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out,
int interior,int exterior) {
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
@ -100,174 +116,22 @@ void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo,
StencilEntry *SE;
int ptype;
///////////////////////////
// Xp
///////////////////////////
SE = st.GetEntry(ptype, Xp, sF);
if (SE->_is_local) {
chi_p = &chi;
if (SE->_permute) {
spProjXp(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else {
spProjXp(chi, in._odata[SE->_offset]);
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp, SE, st);
spReconXp(result, Uchi);
///////////////////////////
// Yp
///////////////////////////
SE = st.GetEntry(ptype, Yp, sF);
if (SE->_is_local) {
chi_p = &chi;
if (SE->_permute) {
spProjYp(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else {
spProjYp(chi, in._odata[SE->_offset]);
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Yp, SE, st);
accumReconYp(result, Uchi);
///////////////////////////
// Zp
///////////////////////////
SE = st.GetEntry(ptype, Zp, sF);
if (SE->_is_local) {
chi_p = &chi;
if (SE->_permute) {
spProjZp(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else {
spProjZp(chi, in._odata[SE->_offset]);
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Zp, SE, st);
accumReconZp(result, Uchi);
///////////////////////////
// Tp
///////////////////////////
SE = st.GetEntry(ptype, Tp, sF);
if (SE->_is_local) {
chi_p = &chi;
if (SE->_permute) {
spProjTp(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else {
spProjTp(chi, in._odata[SE->_offset]);
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Tp, SE, st);
accumReconTp(result, Uchi);
///////////////////////////
// Xm
///////////////////////////
SE = st.GetEntry(ptype, Xm, sF);
if (SE->_is_local) {
chi_p = &chi;
if (SE->_permute) {
spProjXm(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else {
spProjXm(chi, in._odata[SE->_offset]);
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Xm, SE, st);
accumReconXm(result, Uchi);
///////////////////////////
// Ym
///////////////////////////
SE = st.GetEntry(ptype, Ym, sF);
if (SE->_is_local) {
chi_p = &chi;
if (SE->_permute) {
spProjYm(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else {
spProjYm(chi, in._odata[SE->_offset]);
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Ym, SE, st);
accumReconYm(result, Uchi);
///////////////////////////
// Zm
///////////////////////////
SE = st.GetEntry(ptype, Zm, sF);
if (SE->_is_local) {
chi_p = &chi;
if (SE->_permute) {
spProjZm(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else {
spProjZm(chi, in._odata[SE->_offset]);
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Zm, SE, st);
accumReconZm(result, Uchi);
///////////////////////////
// Tm
///////////////////////////
SE = st.GetEntry(ptype, Tm, sF);
if (SE->_is_local) {
chi_p = &chi;
if (SE->_permute) {
spProjTm(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else {
spProjTm(chi, in._odata[SE->_offset]);
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Tm, SE, st);
accumReconTm(result, Uchi);
GENERIC_STENCIL_LEG(Xp,spProjXp,spReconXp);
GENERIC_STENCIL_LEG(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG(Zp,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG(Tp,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG(Xm,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG(Ym,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG(Tm,spProjTm,accumReconTm);
vstream(out._odata[sF], result);
};
// Need controls to do interior, exterior, or both
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out,int interior,int exterior) {
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
@ -276,168 +140,123 @@ void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, Do
StencilEntry *SE;
int ptype;
///////////////////////////
// Xp
///////////////////////////
SE = st.GetEntry(ptype, Xm, sF);
if (SE->_is_local) {
chi_p = &chi;
if (SE->_permute) {
spProjXp(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else {
spProjXp(chi, in._odata[SE->_offset]);
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Xm, SE, st);
spReconXp(result, Uchi);
///////////////////////////
// Yp
///////////////////////////
SE = st.GetEntry(ptype, Ym, sF);
if (SE->_is_local) {
chi_p = &chi;
if (SE->_permute) {
spProjYp(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else {
spProjYp(chi, in._odata[SE->_offset]);
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Ym, SE, st);
accumReconYp(result, Uchi);
///////////////////////////
// Zp
///////////////////////////
SE = st.GetEntry(ptype, Zm, sF);
if (SE->_is_local) {
chi_p = &chi;
if (SE->_permute) {
spProjZp(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else {
spProjZp(chi, in._odata[SE->_offset]);
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Zm, SE, st);
accumReconZp(result, Uchi);
///////////////////////////
// Tp
///////////////////////////
SE = st.GetEntry(ptype, Tm, sF);
if (SE->_is_local) {
chi_p = &chi;
if (SE->_permute) {
spProjTp(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else {
spProjTp(chi, in._odata[SE->_offset]);
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Tm, SE, st);
accumReconTp(result, Uchi);
///////////////////////////
// Xm
///////////////////////////
SE = st.GetEntry(ptype, Xp, sF);
if (SE->_is_local) {
chi_p = &chi;
if (SE->_permute) {
spProjXm(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else {
spProjXm(chi, in._odata[SE->_offset]);
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp, SE, st);
accumReconXm(result, Uchi);
///////////////////////////
// Ym
///////////////////////////
SE = st.GetEntry(ptype, Yp, sF);
if (SE->_is_local) {
chi_p = &chi;
if (SE->_permute) {
spProjYm(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else {
spProjYm(chi, in._odata[SE->_offset]);
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Yp, SE, st);
accumReconYm(result, Uchi);
///////////////////////////
// Zm
///////////////////////////
SE = st.GetEntry(ptype, Zp, sF);
if (SE->_is_local) {
chi_p = &chi;
if (SE->_permute) {
spProjZm(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else {
spProjZm(chi, in._odata[SE->_offset]);
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Zp, SE, st);
accumReconZm(result, Uchi);
///////////////////////////
// Tm
///////////////////////////
SE = st.GetEntry(ptype, Tp, sF);
if (SE->_is_local) {
chi_p = &chi;
if (SE->_permute) {
spProjTm(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else {
spProjTm(chi, in._odata[SE->_offset]);
}
} else {
chi_p = &buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], *chi_p, Tp, SE, st);
accumReconTm(result, Uchi);
GENERIC_STENCIL_LEG(Xm,spProjXp,spReconXp);
GENERIC_STENCIL_LEG(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG(Zm,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG(Tm,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG(Xp,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG(Yp,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG(Tp,spProjTm,accumReconTm);
vstream(out._odata[sF], result);
};
////////////////////////////////////////////////////////////////////
// Interior kernels
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
result=zero;
GENERIC_STENCIL_LEG_INT(Xp,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_INT(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_INT(Zp,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_INT(Tp,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_INT(Xm,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_INT(Ym,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_INT(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_INT(Tm,spProjTm,accumReconTm);
vstream(out._odata[sF], result);
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
result=zero;
GENERIC_STENCIL_LEG_INT(Xm,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_INT(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_INT(Zm,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_INT(Tm,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_INT(Xp,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_INT(Yp,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_INT(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_INT(Tp,spProjTm,accumReconTm);
vstream(out._odata[sF], result);
};
////////////////////////////////////////////////////////////////////
// Exterior kernels
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
int nmu=0;
result=zero;
GENERIC_STENCIL_LEG_EXT(Xp,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_EXT(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_EXT(Zp,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_EXT(Tp,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_EXT(Xm,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_EXT(Ym,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_EXT(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_EXT(Tm,spProjTm,accumReconTm);
if ( nmu ) {
out._odata[sF] = out._odata[sF] + result;
}
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
int nmu=0;
result=zero;
GENERIC_STENCIL_LEG_EXT(Xm,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_EXT(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_EXT(Zm,spProjZp,accumReconZp);
GENERIC_STENCIL_LEG_EXT(Tm,spProjTp,accumReconTp);
GENERIC_STENCIL_LEG_EXT(Xp,spProjXm,accumReconXm);
GENERIC_STENCIL_LEG_EXT(Yp,spProjYm,accumReconYm);
GENERIC_STENCIL_LEG_EXT(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_EXT(Tp,spProjTm,accumReconTm);
if ( nmu ) {
out._odata[sF] = out._odata[sF] + result;
}
};
template <class Impl>
void WilsonKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF,
@ -451,119 +270,14 @@ void WilsonKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHal
int ptype;
SE = st.GetEntry(ptype, dir, sF);
// Xp
if (gamma == Xp) {
if (SE->_is_local && SE->_permute) {
spProjXp(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else if (SE->_is_local) {
spProjXp(chi, in._odata[SE->_offset]);
} else {
chi = buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
spReconXp(result, Uchi);
}
// Yp
if (gamma == Yp) {
if (SE->_is_local && SE->_permute) {
spProjYp(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else if (SE->_is_local) {
spProjYp(chi, in._odata[SE->_offset]);
} else {
chi = buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
spReconYp(result, Uchi);
}
// Zp
if (gamma == Zp) {
if (SE->_is_local && SE->_permute) {
spProjZp(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else if (SE->_is_local) {
spProjZp(chi, in._odata[SE->_offset]);
} else {
chi = buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
spReconZp(result, Uchi);
}
// Tp
if (gamma == Tp) {
if (SE->_is_local && SE->_permute) {
spProjTp(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else if (SE->_is_local) {
spProjTp(chi, in._odata[SE->_offset]);
} else {
chi = buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
spReconTp(result, Uchi);
}
// Xm
if (gamma == Xm) {
if (SE->_is_local && SE->_permute) {
spProjXm(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else if (SE->_is_local) {
spProjXm(chi, in._odata[SE->_offset]);
} else {
chi = buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
spReconXm(result, Uchi);
}
// Ym
if (gamma == Ym) {
if (SE->_is_local && SE->_permute) {
spProjYm(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else if (SE->_is_local) {
spProjYm(chi, in._odata[SE->_offset]);
} else {
chi = buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
spReconYm(result, Uchi);
}
// Zm
if (gamma == Zm) {
if (SE->_is_local && SE->_permute) {
spProjZm(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else if (SE->_is_local) {
spProjZm(chi, in._odata[SE->_offset]);
} else {
chi = buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
spReconZm(result, Uchi);
}
// Tm
if (gamma == Tm) {
if (SE->_is_local && SE->_permute) {
spProjTm(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else if (SE->_is_local) {
spProjTm(chi, in._odata[SE->_offset]);
} else {
chi = buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
spReconTm(result, Uchi);
}
GENERIC_DHOPDIR_LEG(Xp,spProjXp,spReconXp);
GENERIC_DHOPDIR_LEG(Yp,spProjYp,spReconYp);
GENERIC_DHOPDIR_LEG(Zp,spProjZp,spReconZp);
GENERIC_DHOPDIR_LEG(Tp,spProjTp,spReconTp);
GENERIC_DHOPDIR_LEG(Xm,spProjXm,spReconXm);
GENERIC_DHOPDIR_LEG(Ym,spProjYm,spReconYm);
GENERIC_DHOPDIR_LEG(Zm,spProjZm,spReconZm);
GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm);
vstream(out._odata[sF], result);
}

View File

@ -34,8 +34,6 @@ directory
namespace Grid {
namespace QCD {
void bgq_l1p_optimisation(int mode);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Helper routines that implement Wilson stencil for a single site.
// Common to both the WilsonFermion and WilsonFermion5D
@ -44,9 +42,8 @@ class WilsonKernelsStatic {
public:
enum { OptGeneric, OptHandUnroll, OptInlineAsm };
enum { CommsAndCompute, CommsThenCompute };
// S-direction is INNERMOST and takes no part in the parity.
static int Opt; // these are a temporary hack
static int Comms; // these are a temporary hack
static int Opt;
static int Comms;
};
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
@ -66,7 +63,7 @@ public:
switch(Opt) {
#if defined(AVX512) || defined (QPX)
case OptInlineAsm:
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSite(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSite (st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else if (interior) WilsonKernels<Impl>::AsmDhopSiteInt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteExt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else assert(0);
@ -75,7 +72,9 @@ public:
case OptHandUnroll:
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if( exterior) WilsonKernels<Impl>::HandDhopSite(st,lo,U,buf,sF,sU,in,out,interior,exterior);
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::HandDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::HandDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
sF++;
}
sU++;
@ -84,7 +83,10 @@ public:
case OptGeneric:
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if( exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out,interior,exterior);
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
@ -99,11 +101,14 @@ public:
template <bool EnableBool = true>
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) {
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) {
// no kernel choice
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if( exterior) WilsonKernels<Impl>::GenericDhopSite(st, lo, U, buf, sF, sU, in, out,interior,exterior);
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
@ -113,13 +118,13 @@ public:
template <bool EnableBool = true>
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type
DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1) {
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
{
bgq_l1p_optimisation(1);
switch(Opt) {
#if defined(AVX512) || defined (QPX)
case OptInlineAsm:
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSiteDag(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSiteDag (st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else if (interior) WilsonKernels<Impl>::AsmDhopSiteDagInt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteDagExt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
else assert(0);
@ -128,7 +133,10 @@ public:
case OptHandUnroll:
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if( exterior) WilsonKernels<Impl>::HandDhopSiteDag(st,lo,U,buf,sF,sU,in,out,interior,exterior);
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::HandDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::HandDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
@ -137,7 +145,10 @@ public:
case OptGeneric:
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if( exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out,interior,exterior);
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
@ -156,7 +167,10 @@ public:
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if( exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out,interior,exterior);
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out);
else assert(0);
sF++;
}
sU++;
@ -169,36 +183,60 @@ public:
private:
// Specialised variants
void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out,int interior,int exterior);
int sF, int sU, const FermionField &in, FermionField &out);
void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out,int interior,int exterior);
int sF, int sU, const FermionField &in, FermionField &out);
void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out,int interior,int exterior);
int sF, int sU, const FermionField &in, FermionField &out);
void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out,int interior,int exterior);
int sF, int sU, const FermionField &in, FermionField &out);
void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
public:
WilsonKernels(const ImplParams &p = ImplParams());

View File

@ -112,5 +112,16 @@ INSTANTIATE_ASM(DomainWallVec5dImplD);
INSTANTIATE_ASM(ZDomainWallVec5dImplF);
INSTANTIATE_ASM(ZDomainWallVec5dImplD);
INSTANTIATE_ASM(WilsonImplFH);
INSTANTIATE_ASM(WilsonImplDF);
INSTANTIATE_ASM(ZWilsonImplFH);
INSTANTIATE_ASM(ZWilsonImplDF);
INSTANTIATE_ASM(GparityWilsonImplFH);
INSTANTIATE_ASM(GparityWilsonImplDF);
INSTANTIATE_ASM(DomainWallVec5dImplFH);
INSTANTIATE_ASM(DomainWallVec5dImplDF);
INSTANTIATE_ASM(ZDomainWallVec5dImplFH);
INSTANTIATE_ASM(ZDomainWallVec5dImplDF);
}}

View File

@ -71,6 +71,16 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,Doub
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
@ -84,6 +94,16 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,D
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
@ -97,6 +117,16 @@ template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, single
@ -115,6 +145,16 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,D
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
@ -128,6 +168,16 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & l
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
@ -141,6 +191,16 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & l
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef MAYBEPERM
#undef MULT_2SPIN
#define MAYBEPERM(A,B)
@ -162,6 +222,15 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSite(StencilImpl &st,LebesgueOrder
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
@ -174,6 +243,15 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrd
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
@ -189,6 +267,16 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrd
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// Ls vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
@ -205,6 +293,15 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrd
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
@ -217,6 +314,15 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagInt(StencilImpl &st,Lebesgue
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
@ -229,6 +335,15 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagExt(StencilImpl &st,Lebesgue
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef COMPLEX_SIGNS
#undef MAYBEPERM
#undef MULT_2SPIN
@ -269,6 +384,15 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,Doub
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
@ -281,6 +405,15 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,D
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
@ -293,6 +426,15 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,D
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
@ -309,6 +451,15 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,D
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
@ -321,6 +472,15 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & l
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
@ -333,6 +493,15 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & l
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef MAYBEPERM
#undef MULT_2SPIN
#define MAYBEPERM(A,B)
@ -354,6 +523,15 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSite(StencilImpl &st,LebesgueOrder
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
@ -366,6 +544,15 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrd
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
@ -380,6 +567,15 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrd
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
/////////////////////////////////////////////////////////////////
// Ls vectorised, dag Kernel, single
/////////////////////////////////////////////////////////////////
@ -396,6 +592,15 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrd
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
@ -408,6 +613,15 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagInt(StencilImpl &st,Lebesgue
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
@ -420,6 +634,15 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagExt(StencilImpl &st,Lebesgue
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef COMPLEX_SIGNS
#undef MAYBEPERM
#undef MULT_2SPIN

View File

@ -39,24 +39,26 @@
////////////////////////////////////////////////////////////////////////////////
#ifdef INTERIOR_AND_EXTERIOR
#define ZERO_NMU(A)
#define INTERIOR_BLOCK_XP(a,b,PERMUTE_DIR,PROJMEM,RECON) INTERIOR_BLOCK(a,b,PERMUTE_DIR,PROJMEM,RECON)
#define EXTERIOR_BLOCK_XP(a,b,RECON) EXTERIOR_BLOCK(a,b,RECON)
#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
basep = st.GetPFInfo(nent,plocal); nent++; \
if ( local ) { \
LOAD64(%r10,isigns); \
PROJ(base); \
MAYBEPERM(PERMUTE_DIR,perm); \
} else { \
LOAD_CHI(base); \
} \
base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \
PREFETCH_CHIMU(base); \
MULT_2SPIN_DIR_PF(Dir,basep); \
LOAD64(%r10,isigns); \
RECON; \
#define INTERIOR_BLOCK(a,b,PERMUTE_DIR,PROJMEM,RECON) \
LOAD64(%r10,isigns); \
PROJMEM(base); \
MAYBEPERM(PERMUTE_DIR,perm);
#define EXTERIOR_BLOCK(a,b,RECON) \
LOAD_CHI(base);
#define COMMON_BLOCK(a,b,RECON) \
base = st.GetInfo(ptype,local,perm,b,ent,plocal); ent++; \
PREFETCH_CHIMU(base); \
MULT_2SPIN_DIR_PF(a,basep); \
LOAD64(%r10,isigns); \
RECON;
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
PF_GAUGE(Xp); \
PREFETCH1_CHIMU(base); \
ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON)
#define RESULT(base,basep) SAVE_RESULT(base,basep);
@ -67,62 +69,62 @@
////////////////////////////////////////////////////////////////////////////////
#ifdef INTERIOR
#define COMMON_BLOCK(a,b,RECON)
#define ZERO_NMU(A)
#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
basep = st.GetPFInfo(nent,plocal); nent++; \
if ( local ) { \
LOAD64(%r10,isigns); \
PROJ(base); \
MAYBEPERM(PERMUTE_DIR,perm); \
}else if ( st.same_node[Dir] ) {LOAD_CHI(base);} \
if ( local || st.same_node[Dir] ) { \
MULT_2SPIN_DIR_PF(Dir,basep); \
LOAD64(%r10,isigns); \
RECON; \
} \
base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \
PREFETCH_CHIMU(base); \
// No accumulate for DIR0
#define EXTERIOR_BLOCK_XP(a,b,RECON) \
ZERO_PSI; \
base = st.GetInfo(ptype,local,perm,b,ent,plocal); ent++;
#define EXTERIOR_BLOCK(a,b,RECON) \
base = st.GetInfo(ptype,local,perm,b,ent,plocal); ent++;
#define INTERIOR_BLOCK_XP(a,b,PERMUTE_DIR,PROJMEM,RECON) INTERIOR_BLOCK(a,b,PERMUTE_DIR,PROJMEM,RECON)
#define INTERIOR_BLOCK(a,b,PERMUTE_DIR,PROJMEM,RECON) \
LOAD64(%r10,isigns); \
PROJMEM(base); \
MAYBEPERM(PERMUTE_DIR,perm); \
base = st.GetInfo(ptype,local,perm,b,ent,plocal); ent++; \
PREFETCH_CHIMU(base); \
MULT_2SPIN_DIR_PF(a,basep); \
LOAD64(%r10,isigns); \
RECON;
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
PF_GAUGE(Xp); \
PREFETCH1_CHIMU(base); \
{ ZERO_PSI; } \
ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON)
#define RESULT(base,basep) SAVE_RESULT(base,basep);
#endif
////////////////////////////////////////////////////////////////////////////////
// Post comms kernel
////////////////////////////////////////////////////////////////////////////////
#ifdef EXTERIOR
#define ZERO_NMU(A) nmu=0;
#define INTERIOR_BLOCK_XP(a,b,PERMUTE_DIR,PROJMEM,RECON) \
ZERO_PSI; base = st.GetInfo(ptype,local,perm,b,ent,plocal); ent++;
#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
if((!local)&&(!st.same_node[Dir]) ) { \
LOAD_CHI(base); \
MULT_2SPIN_DIR_PF(Dir,base); \
LOAD64(%r10,isigns); \
RECON; \
nmu++; \
}
#define EXTERIOR_BLOCK_XP(a,b,RECON) EXTERIOR_BLOCK(a,b,RECON)
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
nmu=0; \
{ ZERO_PSI;} \
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
if((!local)&&(!st.same_node[Dir]) ) { \
LOAD_CHI(base); \
MULT_2SPIN_DIR_PF(Dir,base); \
LOAD64(%r10,isigns); \
RECON; \
nmu++; \
}
#define INTERIOR_BLOCK(a,b,PERMUTE_DIR,PROJMEM,RECON) \
base = st.GetInfo(ptype,local,perm,b,ent,plocal); ent++;
#define EXTERIOR_BLOCK(a,b,RECON) \
nmu++; \
LOAD_CHI(base); \
MULT_2SPIN_DIR_PF(a,base); \
base = st.GetInfo(ptype,local,perm,b,ent,plocal); ent++; \
LOAD64(%r10,isigns); \
RECON;
#define COMMON_BLOCK(a,b,RECON)
#define RESULT(base,basep) if (nmu){ ADD_RESULT(base,base);}
#define RESULT(base,basep) if (nmu){ ADD_RESULT(base,base);}
#endif
{
int nmu;
int local,perm, ptype;
@ -134,11 +136,15 @@
MASK_REGS;
int nmax=U._grid->oSites();
for(int site=0;site<Ns;site++) {
#ifndef EXTERIOR
int sU =lo.Reorder(ssU);
int ssn=ssU+1; if(ssn>=nmax) ssn=0;
int sUn=lo.Reorder(ssn);
#ifndef EXTERIOR
LOCK_GAUGE(0);
#else
int sU =ssU;
int ssn=ssU+1; if(ssn>=nmax) ssn=0;
int sUn=ssn;
#endif
for(int s=0;s<Ls;s++) {
ss =sU*Ls+s;
@ -146,93 +152,20 @@
int ent=ss*8;// 2*Ndim
int nent=ssn*8;
ZERO_NMU(0);
base = st.GetInfo(ptype,local,perm,Xp,ent,plocal); ent++;
#ifndef EXTERIOR
PF_GAUGE(Xp);
PREFETCH1_CHIMU(base);
#endif
////////////////////////////////
// Xp
////////////////////////////////
basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
INTERIOR_BLOCK_XP(Xp,Yp,PERMUTE_DIR3,DIR0_PROJMEM,DIR0_RECON);
} else {
EXTERIOR_BLOCK_XP(Xp,Yp,DIR0_RECON);
}
COMMON_BLOCK(Xp,Yp,DIR0_RECON);
////////////////////////////////
// Yp
////////////////////////////////
basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
INTERIOR_BLOCK(Yp,Zp,PERMUTE_DIR2,DIR1_PROJMEM,DIR1_RECON);
} else {
EXTERIOR_BLOCK(Yp,Zp,DIR1_RECON);
}
COMMON_BLOCK(Yp,Zp,DIR1_RECON);
////////////////////////////////
// Zp
////////////////////////////////
basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
INTERIOR_BLOCK(Zp,Tp,PERMUTE_DIR1,DIR2_PROJMEM,DIR2_RECON);
} else {
EXTERIOR_BLOCK(Zp,Tp,DIR2_RECON);
}
COMMON_BLOCK(Zp,Tp,DIR2_RECON);
////////////////////////////////
// Tp
////////////////////////////////
basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
INTERIOR_BLOCK(Tp,Xm,PERMUTE_DIR0,DIR3_PROJMEM,DIR3_RECON);
} else {
EXTERIOR_BLOCK(Tp,Xm,DIR3_RECON);
}
COMMON_BLOCK(Tp,Xm,DIR3_RECON);
////////////////////////////////
// Xm
////////////////////////////////
// basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
INTERIOR_BLOCK(Xm,Ym,PERMUTE_DIR3,DIR4_PROJMEM,DIR4_RECON);
} else {
EXTERIOR_BLOCK(Xm,Ym,DIR4_RECON);
}
COMMON_BLOCK(Xm,Ym,DIR4_RECON);
////////////////////////////////
// Ym
////////////////////////////////
basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
INTERIOR_BLOCK(Ym,Zm,PERMUTE_DIR2,DIR5_PROJMEM,DIR5_RECON);
} else {
EXTERIOR_BLOCK(Ym,Zm,DIR5_RECON);
}
COMMON_BLOCK(Ym,Zm,DIR5_RECON);
////////////////////////////////
// Zm
////////////////////////////////
basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
INTERIOR_BLOCK(Zm,Tm,PERMUTE_DIR1,DIR6_PROJMEM,DIR6_RECON);
} else {
EXTERIOR_BLOCK(Zm,Tm,DIR6_RECON);
}
COMMON_BLOCK(Zm,Tm,DIR6_RECON);
////////////////////////////////
// Tm
////////////////////////////////
basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
INTERIOR_BLOCK(Tm,Xp,PERMUTE_DIR0,DIR7_PROJMEM,DIR7_RECON);
} else {
EXTERIOR_BLOCK(Tm,Xp,DIR7_RECON);
}
COMMON_BLOCK(Tm,Xp,DIR7_RECON);
ASM_LEG_XP(Xp,Yp,PERMUTE_DIR3,DIR0_PROJMEM,DIR0_RECON);
ASM_LEG(Yp,Zp,PERMUTE_DIR2,DIR1_PROJMEM,DIR1_RECON);
ASM_LEG(Zp,Tp,PERMUTE_DIR1,DIR2_PROJMEM,DIR2_RECON);
ASM_LEG(Tp,Xm,PERMUTE_DIR0,DIR3_PROJMEM,DIR3_RECON);
ASM_LEG(Xm,Ym,PERMUTE_DIR3,DIR4_PROJMEM,DIR4_RECON);
ASM_LEG(Ym,Zm,PERMUTE_DIR2,DIR5_PROJMEM,DIR5_RECON);
ASM_LEG(Zm,Tm,PERMUTE_DIR1,DIR6_PROJMEM,DIR6_RECON);
ASM_LEG(Tm,Xp,PERMUTE_DIR0,DIR7_PROJMEM,DIR7_RECON);
#ifdef EXTERIOR
if (nmu==0) break;
// if (nmu!=0) std::cout << "EXT "<<sU<<std::endl;
#endif
base = (uint64_t) &out._odata[ss];
basep= st.GetPFInfo(nent,plocal); nent++;
RESULT(base,basep);
@ -258,10 +191,6 @@
#undef DIR5_RECON
#undef DIR6_RECON
#undef DIR7_RECON
#undef EXTERIOR_BLOCK
#undef INTERIOR_BLOCK
#undef EXTERIOR_BLOCK_XP
#undef INTERIOR_BLOCK_XP
#undef COMMON_BLOCK
#undef ZERO_NMU
#undef ASM_LEG
#undef ASM_LEG_XP
#undef RESULT

View File

@ -31,7 +31,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define REGISTER
#define LOAD_CHIMU \
const SiteSpinor & ref (in._odata[offset]); \
{const SiteSpinor & ref (in._odata[offset]); \
Chimu_00=ref()(0)(0);\
Chimu_01=ref()(0)(1);\
Chimu_02=ref()(0)(2);\
@ -43,20 +43,20 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
Chimu_22=ref()(2)(2);\
Chimu_30=ref()(3)(0);\
Chimu_31=ref()(3)(1);\
Chimu_32=ref()(3)(2);
Chimu_32=ref()(3)(2);}
#define LOAD_CHI\
const SiteHalfSpinor &ref(buf[offset]); \
{const SiteHalfSpinor &ref(buf[offset]); \
Chi_00 = ref()(0)(0);\
Chi_01 = ref()(0)(1);\
Chi_02 = ref()(0)(2);\
Chi_10 = ref()(1)(0);\
Chi_11 = ref()(1)(1);\
Chi_12 = ref()(1)(2);
Chi_12 = ref()(1)(2);}
// To splat or not to splat depends on the implementation
#define MULT_2SPIN(A)\
auto & ref(U._odata[sU](A)); \
{auto & ref(U._odata[sU](A)); \
Impl::loadLinkElement(U_00,ref()(0,0)); \
Impl::loadLinkElement(U_10,ref()(1,0)); \
Impl::loadLinkElement(U_20,ref()(2,0)); \
@ -83,7 +83,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
UChi_01+= U_10*Chi_02;\
UChi_11+= U_10*Chi_12;\
UChi_02+= U_20*Chi_02;\
UChi_12+= U_20*Chi_12;
UChi_12+= U_20*Chi_12;}
#define PERMUTE_DIR(dir) \
@ -307,55 +307,132 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
result_31-= UChi_11; \
result_32-= UChi_12;
namespace Grid {
namespace QCD {
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON) \
SE=st.GetEntry(ptype,DIR,ss); \
offset = SE->_offset; \
local = SE->_is_local; \
perm = SE->_permute; \
if ( local ) { \
LOAD_CHIMU; \
PROJ; \
if ( perm) { \
PERMUTE_DIR(PERM); \
} \
} else { \
LOAD_CHI; \
} \
MULT_2SPIN(DIR); \
RECON;
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON) \
SE=st.GetEntry(ptype,DIR,ss); \
offset = SE->_offset; \
local = SE->_is_local; \
perm = SE->_permute; \
if ( local ) { \
LOAD_CHIMU; \
PROJ; \
if ( perm) { \
PERMUTE_DIR(PERM); \
} \
} else if ( st.same_node[DIR] ) { \
LOAD_CHI; \
} \
if (local || st.same_node[DIR] ) { \
MULT_2SPIN(DIR); \
RECON; \
}
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON) \
SE=st.GetEntry(ptype,DIR,ss); \
offset = SE->_offset; \
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
LOAD_CHI; \
MULT_2SPIN(DIR); \
RECON; \
nmu++; \
}
#define HAND_RESULT(ss) \
{ \
SiteSpinor & ref (out._odata[ss]); \
vstream(ref()(0)(0),result_00); \
vstream(ref()(0)(1),result_01); \
vstream(ref()(0)(2),result_02); \
vstream(ref()(1)(0),result_10); \
vstream(ref()(1)(1),result_11); \
vstream(ref()(1)(2),result_12); \
vstream(ref()(2)(0),result_20); \
vstream(ref()(2)(1),result_21); \
vstream(ref()(2)(2),result_22); \
vstream(ref()(3)(0),result_30); \
vstream(ref()(3)(1),result_31); \
vstream(ref()(3)(2),result_32); \
}
#define HAND_RESULT_EXT(ss) \
if (nmu){ \
SiteSpinor & ref (out._odata[ss]); \
ref()(0)(0)+=result_00; \
ref()(0)(1)+=result_01; \
ref()(0)(2)+=result_02; \
ref()(1)(0)+=result_10; \
ref()(1)(1)+=result_11; \
ref()(1)(2)+=result_12; \
ref()(2)(0)+=result_20; \
ref()(2)(1)+=result_21; \
ref()(2)(2)+=result_22; \
ref()(3)(0)+=result_30; \
ref()(3)(1)+=result_31; \
ref()(3)(2)+=result_32; \
}
template<class Impl> void
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out,int interior,int exterior)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
#define HAND_DECLARATIONS(a) \
Simd result_00; \
Simd result_01; \
Simd result_02; \
Simd result_10; \
Simd result_11; \
Simd result_12; \
Simd result_20; \
Simd result_21; \
Simd result_22; \
Simd result_30; \
Simd result_31; \
Simd result_32; \
Simd Chi_00; \
Simd Chi_01; \
Simd Chi_02; \
Simd Chi_10; \
Simd Chi_11; \
Simd Chi_12; \
Simd UChi_00; \
Simd UChi_01; \
Simd UChi_02; \
Simd UChi_10; \
Simd UChi_11; \
Simd UChi_12; \
Simd U_00; \
Simd U_10; \
Simd U_20; \
Simd U_01; \
Simd U_11; \
Simd U_21;
REGISTER Simd result_00; // 12 regs on knc
REGISTER Simd result_01;
REGISTER Simd result_02;
REGISTER Simd result_10;
REGISTER Simd result_11;
REGISTER Simd result_12;
REGISTER Simd result_20;
REGISTER Simd result_21;
REGISTER Simd result_22;
REGISTER Simd result_30;
REGISTER Simd result_31;
REGISTER Simd result_32; // 20 left
REGISTER Simd Chi_00; // two spinor; 6 regs
REGISTER Simd Chi_01;
REGISTER Simd Chi_02;
REGISTER Simd Chi_10;
REGISTER Simd Chi_11;
REGISTER Simd Chi_12; // 14 left
REGISTER Simd UChi_00; // two spinor; 6 regs
REGISTER Simd UChi_01;
REGISTER Simd UChi_02;
REGISTER Simd UChi_10;
REGISTER Simd UChi_11;
REGISTER Simd UChi_12; // 8 left
REGISTER Simd U_00; // two rows of U matrix
REGISTER Simd U_10;
REGISTER Simd U_20;
REGISTER Simd U_01;
REGISTER Simd U_11;
REGISTER Simd U_21; // 2 reg left.
#define ZERO_RESULT \
result_00=zero; \
result_01=zero; \
result_02=zero; \
result_10=zero; \
result_11=zero; \
result_12=zero; \
result_20=zero; \
result_21=zero; \
result_22=zero; \
result_30=zero; \
result_31=zero; \
result_32=zero;
#define Chimu_00 Chi_00
#define Chimu_01 Chi_01
@ -370,475 +447,225 @@ WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge
#define Chimu_31 UChi_11
#define Chimu_32 UChi_12
namespace Grid {
namespace QCD {
template<class Impl> void
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
int offset,local,perm, ptype;
StencilEntry *SE;
// Xp
SE=st.GetEntry(ptype,Xp,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
XM_PROJ;
if ( perm) {
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI;
}
{
MULT_2SPIN(Xp);
}
XM_RECON;
// Yp
SE=st.GetEntry(ptype,Yp,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
YM_PROJ;
if ( perm) {
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI;
}
{
MULT_2SPIN(Yp);
}
YM_RECON_ACCUM;
// Zp
SE=st.GetEntry(ptype,Zp,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
ZM_PROJ;
if ( perm) {
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI;
}
{
MULT_2SPIN(Zp);
}
ZM_RECON_ACCUM;
// Tp
SE=st.GetEntry(ptype,Tp,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
TM_PROJ;
if ( perm) {
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI;
}
{
MULT_2SPIN(Tp);
}
TM_RECON_ACCUM;
// Xm
SE=st.GetEntry(ptype,Xm,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
XP_PROJ;
if ( perm) {
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI;
}
{
MULT_2SPIN(Xm);
}
XP_RECON_ACCUM;
// Ym
SE=st.GetEntry(ptype,Ym,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
YP_PROJ;
if ( perm) {
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI;
}
{
MULT_2SPIN(Ym);
}
YP_RECON_ACCUM;
// Zm
SE=st.GetEntry(ptype,Zm,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
ZP_PROJ;
if ( perm) {
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI;
}
{
MULT_2SPIN(Zm);
}
ZP_RECON_ACCUM;
// Tm
SE=st.GetEntry(ptype,Tm,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
TP_PROJ;
if ( perm) {
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI;
}
{
MULT_2SPIN(Tm);
}
TP_RECON_ACCUM;
{
SiteSpinor & ref (out._odata[ss]);
vstream(ref()(0)(0),result_00);
vstream(ref()(0)(1),result_01);
vstream(ref()(0)(2),result_02);
vstream(ref()(1)(0),result_10);
vstream(ref()(1)(1),result_11);
vstream(ref()(1)(2),result_12);
vstream(ref()(2)(0),result_20);
vstream(ref()(2)(1),result_21);
vstream(ref()(2)(2),result_22);
vstream(ref()(3)(0),result_30);
vstream(ref()(3)(1),result_31);
vstream(ref()(3)(2),result_32);
}
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON);
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM);
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM);
HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM);
HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM);
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM);
HAND_RESULT(ss);
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out,int interior,int exterior)
int ss,int sU,const FermionField &in, FermionField &out)
{
// std::cout << "Hand op Dhop "<<std::endl;
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
REGISTER Simd result_00; // 12 regs on knc
REGISTER Simd result_01;
REGISTER Simd result_02;
REGISTER Simd result_10;
REGISTER Simd result_11;
REGISTER Simd result_12;
REGISTER Simd result_20;
REGISTER Simd result_21;
REGISTER Simd result_22;
REGISTER Simd result_30;
REGISTER Simd result_31;
REGISTER Simd result_32; // 20 left
REGISTER Simd Chi_00; // two spinor; 6 regs
REGISTER Simd Chi_01;
REGISTER Simd Chi_02;
REGISTER Simd Chi_10;
REGISTER Simd Chi_11;
REGISTER Simd Chi_12; // 14 left
REGISTER Simd UChi_00; // two spinor; 6 regs
REGISTER Simd UChi_01;
REGISTER Simd UChi_02;
REGISTER Simd UChi_10;
REGISTER Simd UChi_11;
REGISTER Simd UChi_12; // 8 left
REGISTER Simd U_00; // two rows of U matrix
REGISTER Simd U_10;
REGISTER Simd U_20;
REGISTER Simd U_01;
REGISTER Simd U_11;
REGISTER Simd U_21; // 2 reg left.
#define Chimu_00 Chi_00
#define Chimu_01 Chi_01
#define Chimu_02 Chi_02
#define Chimu_10 Chi_10
#define Chimu_11 Chi_11
#define Chimu_12 Chi_12
#define Chimu_20 UChi_00
#define Chimu_21 UChi_01
#define Chimu_22 UChi_02
#define Chimu_30 UChi_10
#define Chimu_31 UChi_11
#define Chimu_32 UChi_12
HAND_DECLARATIONS(ignore);
StencilEntry *SE;
int offset,local,perm, ptype;
// Xp
SE=st.GetEntry(ptype,Xp,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
XP_PROJ;
if ( perm) {
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI;
}
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON);
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM);
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM);
HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM);
HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM);
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM);
HAND_RESULT(ss);
}
{
MULT_2SPIN(Xp);
}
XP_RECON;
template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
// Yp
SE=st.GetEntry(ptype,Yp,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
YP_PROJ;
if ( perm) {
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI;
}
{
MULT_2SPIN(Yp);
}
YP_RECON_ACCUM;
HAND_DECLARATIONS(ignore);
int offset,local,perm, ptype;
StencilEntry *SE;
ZERO_RESULT;
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
HAND_RESULT(ss);
}
// Zp
SE=st.GetEntry(ptype,Zp,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
ZP_PROJ;
if ( perm) {
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI;
}
{
MULT_2SPIN(Zp);
}
ZP_RECON_ACCUM;
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
// Tp
SE=st.GetEntry(ptype,Tp,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
TP_PROJ;
if ( perm) {
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI;
}
{
MULT_2SPIN(Tp);
}
TP_RECON_ACCUM;
// Xm
SE=st.GetEntry(ptype,Xm,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
XM_PROJ;
if ( perm) {
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI;
}
{
MULT_2SPIN(Xm);
}
XM_RECON_ACCUM;
// Ym
SE=st.GetEntry(ptype,Ym,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
YM_PROJ;
if ( perm) {
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI;
}
{
MULT_2SPIN(Ym);
}
YM_RECON_ACCUM;
HAND_DECLARATIONS(ignore);
// Zm
SE=st.GetEntry(ptype,Zm,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
StencilEntry *SE;
int offset,local,perm, ptype;
ZERO_RESULT;
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
HAND_RESULT(ss);
}
if ( local ) {
LOAD_CHIMU;
ZM_PROJ;
if ( perm) {
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI;
}
{
MULT_2SPIN(Zm);
}
ZM_RECON_ACCUM;
template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
// Tm
SE=st.GetEntry(ptype,Tm,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
HAND_DECLARATIONS(ignore);
if ( local ) {
LOAD_CHIMU;
TM_PROJ;
if ( perm) {
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
} else {
LOAD_CHI;
}
{
MULT_2SPIN(Tm);
}
TM_RECON_ACCUM;
int offset,local,perm, ptype;
StencilEntry *SE;
int nmu=0;
ZERO_RESULT;
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
HAND_RESULT_EXT(ss);
}
{
SiteSpinor & ref (out._odata[ss]);
vstream(ref()(0)(0),result_00);
vstream(ref()(0)(1),result_01);
vstream(ref()(0)(2),result_02);
vstream(ref()(1)(0),result_10);
vstream(ref()(1)(1),result_11);
vstream(ref()(1)(2),result_12);
vstream(ref()(2)(0),result_20);
vstream(ref()(2)(1),result_21);
vstream(ref()(2)(2),result_22);
vstream(ref()(3)(0),result_30);
vstream(ref()(3)(1),result_31);
vstream(ref()(3)(2),result_32);
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
StencilEntry *SE;
int offset,local,perm, ptype;
int nmu=0;
ZERO_RESULT;
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
HAND_RESULT_EXT(ss);
}
////////////////////////////////////////////////
// Specialise Gparity to simple implementation
////////////////////////////////////////////////
template<> void
WilsonKernels<GparityWilsonImplF>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
SiteHalfSpinor *buf,
int sF,int sU,const FermionField &in, FermionField &out,int internal,int external)
{
assert(0);
}
template<> void
WilsonKernels<GparityWilsonImplF>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
SiteHalfSpinor *buf,
int sF,int sU,const FermionField &in, FermionField &out,int internal,int external)
{
assert(0);
}
template<> void
WilsonKernels<GparityWilsonImplD>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int sF,int sU,const FermionField &in, FermionField &out,int internal,int external)
{
assert(0);
}
template<> void
WilsonKernels<GparityWilsonImplD>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int sF,int sU,const FermionField &in, FermionField &out,int internal,int external)
{
assert(0);
}
#define HAND_SPECIALISE_EMPTY(IMPL) \
template<> void \
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
HAND_SPECIALISE_EMPTY(GparityWilsonImplF);
HAND_SPECIALISE_EMPTY(GparityWilsonImplD);
HAND_SPECIALISE_EMPTY(GparityWilsonImplFH);
HAND_SPECIALISE_EMPTY(GparityWilsonImplDF);
////////////// Wilson ; uses this implementation /////////////////////
// Need Nc=3 though //
#define INSTANTIATE_THEM(A) \
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out,int interior,int exterior); \
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out,int interior,int exterior);
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out);
INSTANTIATE_THEM(WilsonImplF);
INSTANTIATE_THEM(WilsonImplD);
@ -850,5 +677,15 @@ INSTANTIATE_THEM(DomainWallVec5dImplF);
INSTANTIATE_THEM(DomainWallVec5dImplD);
INSTANTIATE_THEM(ZDomainWallVec5dImplF);
INSTANTIATE_THEM(ZDomainWallVec5dImplD);
INSTANTIATE_THEM(WilsonImplFH);
INSTANTIATE_THEM(WilsonImplDF);
INSTANTIATE_THEM(ZWilsonImplFH);
INSTANTIATE_THEM(ZWilsonImplDF);
INSTANTIATE_THEM(GparityWilsonImplFH);
INSTANTIATE_THEM(GparityWilsonImplDF);
INSTANTIATE_THEM(DomainWallVec5dImplFH);
INSTANTIATE_THEM(DomainWallVec5dImplDF);
INSTANTIATE_THEM(ZDomainWallVec5dImplFH);
INSTANTIATE_THEM(ZDomainWallVec5dImplDF);
}}

View File

@ -29,7 +29,7 @@ directory
#ifndef GRID_QCD_GAUGE_H
#define GRID_QCD_GAUGE_H
#include <Grid/qcd/action/gauge/GaugeImpl.h>
#include <Grid/qcd/action/gauge/GaugeImplementations.h>
#include <Grid/qcd/utils/WilsonLoops.h>
#include <Grid/qcd/action/gauge/WilsonGaugeAction.h>
#include <Grid/qcd/action/gauge/PlaqPlusRectangleAction.h>

View File

@ -0,0 +1,142 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/gauge/GaugeImpl.h
Copyright (C) 2015
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_GAUGE_IMPL_TYPES_H
#define GRID_GAUGE_IMPL_TYPES_H
namespace Grid {
namespace QCD {
////////////////////////////////////////////////////////////////////////
// Implementation dependent gauge types
////////////////////////////////////////////////////////////////////////
#define INHERIT_GIMPL_TYPES(GImpl) \
typedef typename GImpl::Simd Simd; \
typedef typename GImpl::LinkField GaugeLinkField; \
typedef typename GImpl::Field GaugeField; \
typedef typename GImpl::SiteField SiteGaugeField; \
typedef typename GImpl::SiteLink SiteGaugeLink;
#define INHERIT_FIELD_TYPES(Impl) \
typedef typename Impl::Simd Simd; \
typedef typename Impl::SiteField SiteField; \
typedef typename Impl::Field Field;
// hardcodes the exponential approximation in the template
template <class S, int Nrepresentation = Nc, int Nexp = 12 > class GaugeImplTypes {
public:
typedef S Simd;
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation>>>;
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation>>, Nd>;
typedef iImplGaugeLink<Simd> SiteLink;
typedef iImplGaugeField<Simd> SiteField;
typedef Lattice<SiteLink> LinkField;
typedef Lattice<SiteField> Field;
// Guido: we can probably separate the types from the HMC functions
// this will create 2 kind of implementations
// probably confusing the users
// Now keeping only one class
// Move this elsewhere? FIXME
static inline void AddLink(Field &U, LinkField &W,
int mu) { // U[mu] += W
PARALLEL_FOR_LOOP
for (auto ss = 0; ss < U._grid->oSites(); ss++) {
U._odata[ss]._internal[mu] =
U._odata[ss]._internal[mu] + W._odata[ss]._internal;
}
}
///////////////////////////////////////////////////////////
// Move these to another class
// HMC auxiliary functions
static inline void generate_momenta(Field &P, GridParallelRNG &pRNG) {
// specific for SU gauge fields
LinkField Pmu(P._grid);
Pmu = zero;
for (int mu = 0; mu < Nd; mu++) {
SU<Nrepresentation>::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu);
PokeIndex<LorentzIndex>(P, Pmu, mu);
}
}
static inline Field projectForce(Field &P) { return Ta(P); }
static inline void update_field(Field& P, Field& U, double ep){
for (int mu = 0; mu < Nd; mu++) {
auto Umu = PeekIndex<LorentzIndex>(U, mu);
auto Pmu = PeekIndex<LorentzIndex>(P, mu);
Umu = expMat(Pmu, ep, Nexp) * Umu;
PokeIndex<LorentzIndex>(U, ProjectOnGroup(Umu), mu);
}
}
static inline RealD FieldSquareNorm(Field& U){
LatticeComplex Hloc(U._grid);
Hloc = zero;
for (int mu = 0; mu < Nd; mu++) {
auto Umu = PeekIndex<LorentzIndex>(U, mu);
Hloc += trace(Umu * Umu);
}
Complex Hsum = sum(Hloc);
return Hsum.real();
}
static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
SU<Nc>::HotConfiguration(pRNG, U);
}
static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) {
SU<Nc>::TepidConfiguration(pRNG, U);
}
static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) {
SU<Nc>::ColdConfiguration(pRNG, U);
}
};
typedef GaugeImplTypes<vComplex, Nc> GimplTypesR;
typedef GaugeImplTypes<vComplexF, Nc> GimplTypesF;
typedef GaugeImplTypes<vComplexD, Nc> GimplTypesD;
typedef GaugeImplTypes<vComplex, SU<Nc>::AdjointDimension> GimplAdjointTypesR;
typedef GaugeImplTypes<vComplexF, SU<Nc>::AdjointDimension> GimplAdjointTypesF;
typedef GaugeImplTypes<vComplexD, SU<Nc>::AdjointDimension> GimplAdjointTypesD;
} // QCD
} // Grid
#endif // GRID_GAUGE_IMPL_TYPES_H

View File

@ -2,7 +2,7 @@
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/gauge/GaugeImpl.h
Source file: ./lib/qcd/action/gauge/GaugeImplementations.h
Copyright (C) 2015
@ -26,53 +26,14 @@ See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_GAUGE_IMPL_H
#define GRID_QCD_GAUGE_IMPL_H
#ifndef GRID_QCD_GAUGE_IMPLEMENTATIONS_H
#define GRID_QCD_GAUGE_IMPLEMENTATIONS_H
#include "GaugeImplTypes.h"
namespace Grid {
namespace QCD {
////////////////////////////////////////////////////////////////////////
// Implementation dependent gauge types
////////////////////////////////////////////////////////////////////////
template <class Gimpl> class WilsonLoops;
#define INHERIT_GIMPL_TYPES(GImpl) \
typedef typename GImpl::Simd Simd; \
typedef typename GImpl::GaugeLinkField GaugeLinkField; \
typedef typename GImpl::GaugeField GaugeField; \
typedef typename GImpl::SiteGaugeField SiteGaugeField; \
typedef typename GImpl::SiteGaugeLink SiteGaugeLink;
//
template <class S, int Nrepresentation = Nc> class GaugeImplTypes {
public:
typedef S Simd;
template <typename vtype>
using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation>>>;
template <typename vtype>
using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation>>, Nd>;
typedef iImplGaugeLink<Simd> SiteGaugeLink;
typedef iImplGaugeField<Simd> SiteGaugeField;
typedef Lattice<SiteGaugeLink> GaugeLinkField; // bit ugly naming; polarised
// gauge field, lorentz... all
// ugly
typedef Lattice<SiteGaugeField> GaugeField;
// Move this elsewhere? FIXME
static inline void AddGaugeLink(GaugeField &U, GaugeLinkField &W,
int mu) { // U[mu] += W
parallel_for (auto ss = 0; ss < U._grid->oSites(); ss++) {
U._odata[ss]._internal[mu] =
U._odata[ss]._internal[mu] + W._odata[ss]._internal;
}
}
};
// Composition with smeared link, bc's etc.. probably need multiple inheritance
// Variable precision "S" and variable Nc
template <class GimplTypes> class PeriodicGaugeImpl : public GimplTypes {
@ -168,14 +129,6 @@ public:
static inline bool isPeriodicGaugeField(void) { return false; }
};
typedef GaugeImplTypes<vComplex, Nc> GimplTypesR;
typedef GaugeImplTypes<vComplexF, Nc> GimplTypesF;
typedef GaugeImplTypes<vComplexD, Nc> GimplTypesD;
typedef GaugeImplTypes<vComplex, SU<Nc>::AdjointDimension> GimplAdjointTypesR;
typedef GaugeImplTypes<vComplexF, SU<Nc>::AdjointDimension> GimplAdjointTypesF;
typedef GaugeImplTypes<vComplexD, SU<Nc>::AdjointDimension> GimplAdjointTypesD;
typedef PeriodicGaugeImpl<GimplTypesR> PeriodicGimplR; // Real.. whichever prec
typedef PeriodicGaugeImpl<GimplTypesF> PeriodicGimplF; // Float
typedef PeriodicGaugeImpl<GimplTypesD> PeriodicGimplD; // Double
@ -187,6 +140,8 @@ typedef PeriodicGaugeImpl<GimplAdjointTypesD> PeriodicGimplAdjD; // Double
typedef ConjugateGaugeImpl<GimplTypesR> ConjugateGimplR; // Real.. whichever prec
typedef ConjugateGaugeImpl<GimplTypesF> ConjugateGimplF; // Float
typedef ConjugateGaugeImpl<GimplTypesD> ConjugateGimplD; // Double
}
}

View File

@ -47,9 +47,19 @@ namespace Grid{
public:
PlaqPlusRectangleAction(RealD b,RealD c): c_plaq(b),c_rect(c){};
virtual std::string action_name(){return "PlaqPlusRectangleAction";}
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {}; // noop as no pseudoferms
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name() <<"] c_plaq: " << c_plaq << std::endl;
sstream << GridLogMessage << "["<<action_name() <<"] c_rect: " << c_rect << std::endl;
return sstream.str();
}
virtual RealD S(const GaugeField &U) {
RealD vol = U._grid->gSites();
@ -108,32 +118,32 @@ namespace Grid{
class RBCGaugeAction : public PlaqPlusRectangleAction<Gimpl> {
public:
INHERIT_GIMPL_TYPES(Gimpl);
RBCGaugeAction(RealD beta,RealD c1) : PlaqPlusRectangleAction<Gimpl>(beta*(1.0-8.0*c1), beta*c1) {
};
RBCGaugeAction(RealD beta,RealD c1) : PlaqPlusRectangleAction<Gimpl>(beta*(1.0-8.0*c1), beta*c1) {};
virtual std::string action_name(){return "RBCGaugeAction";}
};
template<class Gimpl>
class IwasakiGaugeAction : public RBCGaugeAction<Gimpl> {
public:
INHERIT_GIMPL_TYPES(Gimpl);
IwasakiGaugeAction(RealD beta) : RBCGaugeAction<Gimpl>(beta,-0.331) {
};
IwasakiGaugeAction(RealD beta) : RBCGaugeAction<Gimpl>(beta,-0.331) {};
virtual std::string action_name(){return "IwasakiGaugeAction";}
};
template<class Gimpl>
class SymanzikGaugeAction : public RBCGaugeAction<Gimpl> {
public:
INHERIT_GIMPL_TYPES(Gimpl);
SymanzikGaugeAction(RealD beta) : RBCGaugeAction<Gimpl>(beta,-1.0/12.0) {
};
SymanzikGaugeAction(RealD beta) : RBCGaugeAction<Gimpl>(beta,-1.0/12.0) {};
virtual std::string action_name(){return "SymanzikGaugeAction";}
};
template<class Gimpl>
class DBW2GaugeAction : public RBCGaugeAction<Gimpl> {
public:
INHERIT_GIMPL_TYPES(Gimpl);
DBW2GaugeAction(RealD beta) : RBCGaugeAction<Gimpl>(beta,-1.4067) {
};
DBW2GaugeAction(RealD beta) : RBCGaugeAction<Gimpl>(beta,-1.4067) {};
virtual std::string action_name(){return "DBW2GaugeAction";}
};
}

Some files were not shown because too many files have changed in this diff Show More