1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-10-26 09:39:34 +00:00

Compare commits

...

321 Commits

Author SHA1 Message Date
meifeng
7a7f823f00 Merge 37d1d87c3c into 73c0b29535 2024-02-16 15:15:35 -05:00
73c0b29535 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2024-02-13 20:19:32 +00:00
303b83cdb8 Scaling benchmarks, verbosity and MPICH aware in acceleratorInit()
For some reason Dirichlet benchmark fails on several nodes; need to
debug this.
2024-02-13 19:48:03 +00:00
5ef4da3f29 Silence verbose 2024-02-13 19:47:36 +00:00
1502860004 Benchmark scripts 2024-02-13 19:47:02 +00:00
585efc6f3f More benchmark scripts 2024-02-13 19:40:49 +00:00
62055e04dd missing semicolon generates error with some compilers 2024-02-13 18:18:27 +01:00
7019916294 RNG seed change safer for large volumes; this is a long term solution 2024-02-07 00:56:39 +00:00
91cf5ee312 Updated bench script 2024-02-06 23:45:10 +00:00
5bfa88be85 Aurora MPI standalone benchmake and options that work well 2024-02-06 16:28:40 +00:00
2a0d75bac2 Aurora files 2023-12-21 23:20:17 +00:00
Meifeng Lin
37d1d87c3c bug fix for Intel GPUs 2023-12-19 08:03:28 -06:00
Meifeng Lin
1381dbc8ef Revert back to Grid develop version since new LLVM compilers now do not require static loop count variables. 2023-12-15 08:59:18 -05:00
Meifeng Lin
cc5ab624a2 Merge branch 'feature/omp-offload' of github.com:BNL-HPC/Grid into feature/omp-offload 2023-12-14 15:33:52 -05:00
meifeng
72641211cd Merge branch 'paboyle:develop' into feature/omp-offload 2023-12-14 15:31:39 -05:00
meifeng
505cc6927b Merge pull request #6 from atif4461/omp-offload-develop
Omp offload develop
2023-12-14 15:30:12 -05:00
Peter Boyle
f48298ad4e Bug fix 2023-12-11 20:57:02 -05:00
root
645e47c1ba Config for Ampere Altra ARM 2023-12-08 16:17:56 -05:00
Peter Boyle
d1d9827263 Integrator logging update 2023-12-08 12:14:00 -05:00
Mohammad Atif
f516acda5f fixed conflicts; su3 working 2023-12-04 17:20:17 -05:00
Mohammad Atif
7a7aa61d52 cleaned up 2023-12-04 16:37:28 -05:00
Peter Boyle
14643c0aab SDCC benchmarking scripts for A100 nodes and IceLake nodes (AVX512) 2023-12-04 15:45:57 -05:00
Mohammad Atif
867abeaf8e removed print flags 2023-12-04 15:12:03 -05:00
Peter Boyle
b77a9b8947 SDDC compiles starting 2023-11-30 14:31:51 -05:00
Peter Boyle
7d077fe493 Frontier compiel 2023-11-09 13:58:44 -05:00
Peter Boyle
51051df62c 3GeV run setup 2023-10-16 20:49:52 +03:00
Peter Boyle
33097681b9 FTHMC compiled and merged to develop 2023-10-14 00:42:55 +03:00
Peter Boyle
07e4900218 FTHMC commit 2023-10-13 18:21:57 +03:00
Peter Boyle
36ab567d67 FTHMC 3 Gev 2023-10-13 18:21:57 +03:00
Peter Boyle
e19171523b FTHMC Status at lattice conference commit 2023-10-13 18:21:56 +03:00
Peter Boyle
9626a2c7c0 Asynch handling 2023-10-13 18:21:56 +03:00
Peter Boyle
e936f5b80b IfGridTensor shorthand 2023-10-13 18:21:56 +03:00
Peter Boyle
ffc0639cb9 Running in HMC tests 2023-10-13 18:21:56 +03:00
Peter Boyle
c5b43b322c traceProduct eliminates non-contributing intermediate terms 2023-10-13 18:21:56 +03:00
Peter Boyle
c9c4576237 Improved frontier cshift 2023-10-13 18:21:56 +03:00
Peter Boyle
6d0c2de399 Deprecate teh PVC directory and make a PVC-OEM generic PVC target with
no queueing system dependency -- just interactive scripts
2023-10-03 17:04:20 +00:00
Peter Boyle
7786ea9921 Bug fix in script 2023-10-03 09:58:44 -07:00
Peter Boyle
d93eac7b1c Performance regressed and is OK in icpx 2023.2 2023-10-03 15:53:14 +00:00
Peter Boyle
afc316f501 Rename headers 2023-10-02 16:25:11 -04:00
Peter Boyle
f14bfd5c1b Relocate sub includes 2023-10-02 16:23:38 -04:00
Peter Boyle
c5f1420dea Merge remote-tracking branch 'LupoA/develop' into LupoA-develop 2023-10-02 16:22:35 -04:00
Peter Boyle
018e6da872 Merge pull request #440 from giltirn/feature/paddedcellgauge
Feature/paddedcellgauge
2023-10-02 10:00:42 -04:00
Peter Boyle
b77bccfac2 Merge pull request #444 from mmphys/feature/docX
Update doc complete list of Macports needed to build Grid on a fresh Mac
2023-10-02 09:57:11 -04:00
Peter Boyle
80359e0d49 Bland SYCL compile 2023-09-26 13:20:27 -07:00
Peter Boyle
3d437c5cc4 Making SYCL happy 2023-09-26 13:19:42 -07:00
atif4461
e5bc51779a edited readme amd stack err 2023-09-08 21:31:08 -04:00
Mohammad Atif
157368ed04 Merge branch 'omp-offload-develop' of https://github.com/atif4461/Grid into omp-offload-develop 2023-08-27 11:02:03 -04:00
Mohammad Atif
ec2ddda12c included pragma map in Lattice_reduction.h 2023-08-27 11:00:56 -04:00
atif4461
5a5c481d45 added objdump files 2023-08-27 01:53:12 -04:00
atif4461
59dade8346 added steps to reproduce amd omp gpu bug 2023-08-27 00:50:58 -04:00
Peter Boyle
b8a7004365 Partial fraction test 2023-08-14 15:17:03 -04:00
Michael Marshall
bd56c95a6f Update documentation with complete list of Macports needed to build Grid on a fresh Mac 2023-07-14 13:50:06 +01:00
Peter Boyle
994512048e Merge pull request #439 from felixerben/bugfix/IRL_convergence
Bugfix/irl convergence
2023-07-12 16:32:26 -04:00
chillenzer
dbd8bb49dc Merge pull request #32 from LupoA/sp2n/develop
Sp2n/develop
2023-07-04 15:23:43 +00:00
Julian Lenz
3a29af0ce4 Fixed linker error 2023-07-04 16:08:44 +01:00
Julian Lenz
f7b79cdd45 Added test for ProjectSpn 2023-07-03 18:00:32 +01:00
Mohammad Atif
1bda8c47fa fixed conflicts after merging pabyle develop 2023-07-03 12:35:37 -04:00
Mohammad Atif
2100cc6497 fixed conflicts after merging pabyle develop 2023-07-03 11:46:37 -04:00
Mohammad Atif
ef8af7bff8 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-07-03 11:20:07 -04:00
Mohammad Atif
cb277ae516 added file line traces 2023-07-03 11:18:13 -04:00
Alessandro Lupo
075b9d22d0 adjoint rep implemented as 2indx symmetric 2023-07-02 13:58:31 +01:00
Alessandro Lupo
b92428f05f better test 2023-07-02 13:34:03 +01:00
Alessandro Lupo
34b11864b6 prettiest tests 2023-07-02 13:25:57 +01:00
Christopher Kelly
1dfaa08afb The stencils for the staple and rect-staple padded cell implementations are now created and stored by workspace classes that allow for reuse providing the grids remain consistent
The workspaces are now used by the plaq+rectangle gauge action resulting in a further 2x performance improvement as measured on a 16^4 local volume for 2 nodes (16 ranks) of Crusher
2023-06-28 15:11:24 -04:00
Christopher Kelly
f44dce390f Implemented acclerator-optimized versions of localCopyRegion and insertSliceLocal to speed up padding
Fixed const correctness on PaddedCell methods
Fixed compile issues on Crusher
Added timing breakdowns for PaddedCell::Expand and the padded implementations of the staples, visible under --log Performance
Optimized kernel for StaplePadded
Test_iwasaki_action_newstaple now repeats the calculation 10 times and reports average timings
2023-06-27 14:58:10 -04:00
Christopher Kelly
bb71e9a96a Added PaddedCell and GeneralisedLocalStencil header includes to standard base headers
Moved versions of the padded-cell implementations of staple and rect-staple from test code to WilsonLoops header
Added StapleAndRectStapleAll which is now called by the plaq+rectangle action class. Under the hood it uses the padded cell implementations with maximal reuse of the padded gauge links
2023-06-27 11:23:30 -04:00
78bae9417c returning Nstop vectors even if not all meet true convergence criterion 2023-06-27 14:38:19 +01:00
dd170ead01 whitespace 2023-06-27 11:37:01 +01:00
014704856f do one more iteration if not all vectors converged 2023-06-27 11:33:30 +01:00
Christopher Kelly
6f6844ccf1 Added new StapleAll and RectStapleAll functions that return the staples for all mu as an array
Modified plaq+rectangle gauge actions to use the above
Added a test code to confirm the above changes
2023-06-26 15:48:47 -04:00
Christopher Kelly
4c6613d72c Modified RectStapleDouble and RectStapleOptimised to use Gauge-BC respecting CshiftLink
Added test code tests/debug/Test_optimized_staple_gaugebc demonstrating equivalence of above to RectStapleUnoptimised for cconj gauge BCs
Removed optimized staple only being used for periodic gauge BCs; it is now always used
2023-06-26 10:20:23 -04:00
Peter Boyle
ee92e08edb Merge pull request #435 from fjosw/fix/warnings_in_WilsonKernelsImplementation
Unused variable in WilsonKernelsImplementation
2023-06-23 11:47:19 -04:00
Peter Boyle
c1dcee9328 Merge pull request #437 from fjosw/fix/stencil_debug
Added GridLogDebug to BuildSurfaceList debug message
2023-06-23 11:47:00 -04:00
Alessandro Lupo
559257bbe9 better documentation and filelist names 2023-06-23 16:16:48 +01:00
Peter Boyle
6b150961fe Better script 2023-06-23 18:09:25 +03:00
Alessandro Lupo
cff1f8d3b8 rm unused variables and formatting 2023-06-23 16:04:18 +01:00
Alessandro Lupo
f27d2083cd adjustments in SUn and Sp2n impl 2023-06-23 15:34:08 +01:00
Christopher Kelly
36cc9c524f Threaded the constructor of GeneralLocalStencil 2023-06-23 09:57:38 -04:00
Alessandro Lupo
2822487450 rm unncessary line 2023-06-23 14:55:23 +01:00
Alessandro Lupo
e07fafe46a minor adjustments to twoindex 2023-06-23 12:18:04 +01:00
Alessandro Lupo
063d290bd8 missing function 2023-06-23 11:11:20 +01:00
Alessandro Lupo
4e6194d92a Avoid code duplication in ProjectSUn 2023-06-23 11:03:50 +01:00
Alessandro Lupo
de30c4e22a minor improvements 2023-06-23 10:49:41 +01:00
Peter Boyle
5bafcaedfa Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-06-22 19:59:45 +03:00
Peter Boyle
bfeceae708 FTHMC 2023-06-22 12:58:18 -04:00
Peter Boyle
eacb66591f Config command 2023-06-22 19:56:40 +03:00
Peter Boyle
fadaa85626 Update 2023-06-22 19:56:27 +03:00
Peter Boyle
02a5b0d786 Updating run during testing 2023-06-22 19:52:46 +03:00
Peter Boyle
0e2141442a Dennis says broken 2023-06-22 19:19:51 +03:00
Peter Boyle
769eb0eecb Precision coverage 2023-06-22 19:19:20 +03:00
Christopher Kelly
4241c7d4a3 Imported coalescedReadGeneralPermute GPU implementation from Christoph
Fixed bug in padded staple code where extract was being called on the result before the GPU view was closed
Fixed compile issue with pointer cast in padded staple code
Added timing summaries of padded staple code and timing breakdown of staple implementation to Test_padded_cell_staple
2023-06-21 16:01:01 -04:00
Christopher Kelly
7b11075102 The user can now specify the implementation of Cshift used by the PaddedCell class through a virtual base class API. Implementations for default (regular Cshift) and for gauge links (which respects the gauge BCs)
Fixed const-correctness for PaddedCell and ConjugateGimpl::setDirections
Modified test code for padded-cell implementation of staple, rect-staple to use cconj BCs
2023-06-20 17:09:56 -04:00
Christopher Kelly
abc658dca5 Added coalescedReadGeneralPermute CPU implementation based on Christoph's GPT code
In a test code, implemented a padded-cell version of the staple and rectangular-staple calculation
2023-06-20 16:14:25 -04:00
Alessandro Lupo
2372275b2c Merge pull request #36 from LupoA/sp2n/gpu-bugfix
Sp2n/gpu bugfix [close #30]
2023-06-20 13:46:00 +01:00
chillenzer
ef736e8aa4 Merge pull request #35 from LupoA/sp2n/enableSp
consistent enable sp config flag
2023-06-20 10:41:09 +00:00
Julian Lenz
5e539e2d54 Forgot some follow-ups on changed signature 2023-06-18 12:37:51 +01:00
Julian Lenz
96773f5254 Apparently forgot to remove one Lattice version 2023-06-18 12:21:39 +01:00
Alessandro Lupo
d80df09f3b consistent enable sp config flag 2023-06-16 19:16:46 +01:00
Julian Lenz
621e612c30 Fix non-zero ret on device bug 2023-06-16 16:27:49 +01:00
Julian Lenz
8c3792721b ClangFormat 2023-06-16 15:58:23 +01:00
Julian Lenz
c95bbd3948 Remove accelerated lattice version 2023-06-16 15:50:26 +01:00
Julian Lenz
e28ab7a732 Re-included instantiations for symmetric 2Index AS Sp 2023-06-16 14:20:37 +01:00
Alessandro Lupo
c797cbe737 deal with post-merge trauma 2023-06-16 14:20:37 +01:00
Alessandro Lupo
e09dfbf1c2 definetely the right merge upstream/develop 2023-06-16 14:19:46 +01:00
85e35c4da1 fix: added GridLogDebug to BuildSurfaceList debug message. 2023-06-16 10:31:16 +01:00
Peter Boyle
d72e914cf0 Profiling temporary code until optimised 2023-06-15 10:43:04 -04:00
Peter Boyle
3b5254e2d5 Optional checkpoint smeared configs for FTHMC 2023-06-15 10:43:04 -04:00
Peter Boyle
f1c358b596 Additional tests 2023-06-15 10:43:04 -04:00
Peter Boyle
c0ef210265 Hot start should be properly Hot 2023-06-15 10:43:04 -04:00
Peter Boyle
e3e1cc1962 Ta project 2023-06-15 10:43:04 -04:00
Peter Boyle
723eadbb5c Keep methods virtual 2023-06-15 10:43:04 -04:00
Peter Boyle
e24637ec1e Clean up 2023-06-15 10:43:04 -04:00
Peter Boyle
8b01ff4ce7 Integrator over to smeared force structure 2023-06-15 10:43:04 -04:00
Peter Boyle
588197c487 Smeared action virtual class 2023-06-15 10:43:04 -04:00
Julian Lenz
116d90b0ee First attempt on #30 2023-06-15 15:09:37 +01:00
Julian Lenz
b0646ca187 Remove some unused variables 2023-06-15 15:09:09 +01:00
Peter Boyle
1352bad2e4 Sunspot compile 2023-06-15 11:22:46 +00:00
chillenzer
4895ff260e Merge pull request #28 from LupoA/sp2n/config
compile sp2n fermion impl only if declared at config time
2023-06-09 13:07:48 +00:00
Alessandro Lupo
470d93006a compile sp2n fermion impl only if declared at config time 2023-06-07 12:53:33 +01:00
chillenzer
2f3d03f188 Merge pull request #27 from LupoA/sp2n/documentation
documentation for gaugegroup and sp2n
2023-06-01 16:42:27 +00:00
Alessandro Lupo
8db7c23bee improve documentation 2023-06-01 17:39:10 +01:00
chillenzer
69dc5172dc Merge pull request #26 from LupoA/sp2n/irreps
Sp2n/irreps
2023-06-01 16:28:15 +00:00
Julian Lenz
fd72eb6546 Merge branch 'sp2n/algorithm' into sp2n/irreps 2023-06-01 17:24:01 +01:00
Peter Boyle
ffd7301649 Updated masked / fthmc smeared config container 2023-06-01 06:23:02 -04:00
Peter Boyle
d2a8494044 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-06-01 06:22:33 -04:00
Peter Boyle
0982e0d19b Jacobian action wrapper for FTHMC 2023-06-01 06:15:08 -04:00
Peter Boyle
3badbfc3c1 Refactor the Action and Smeared gauge configuration containers. Add first pass at FTHMC action 2023-06-01 06:14:28 -04:00
Peter Boyle
5465961e30 New test for FTHMC portion 2023-06-01 06:14:04 -04:00
477b794bc5 fix: unused variable removed. 2023-05-29 14:08:53 +01:00
Peter Boyle
4835fd1a87 HIP stream synch 2023-05-27 17:58:22 +03:00
Peter Boyle
6533c25814 Lumi 2023-05-27 16:13:32 +03:00
Alessandro Lupo
b405767569 make private methods private 2023-05-26 17:02:16 +01:00
Alessandro Lupo
fe88a0c12f cleaner twoindex class, cleaner tests 2023-05-26 16:55:30 +01:00
Alessandro Lupo
e61a9ed2b4 partial revert 2023-05-26 13:54:26 +01:00
Alessandro Lupo
de8daa3824 group is SUn by default 2023-05-26 13:44:41 +01:00
Alessandro Lupo
3a50fb29cb directly call sp helper 2023-05-26 13:28:47 +01:00
Alessandro Lupo
6647d2656f rm unnecessary specialisation 2023-05-26 12:27:22 +01:00
Alessandro Lupo
a6f4dbeb6d remove redundant template parameter 2023-05-26 12:13:40 +01:00
Alessandro Lupo
92a282f2d8 Merge pull request #24 from LupoA/sp2n/fix_static_assert_symmetric
Move static_assert inside of function
2023-05-26 11:13:50 +01:00
Alessandro Lupo
ca2fd9fc7b documentation for gaugegroup and sp2n 2023-05-25 18:40:54 +01:00
Alessandro Lupo
be1a4f5860 implement TwoIndexSymm for sp2n 2023-05-22 17:21:03 +01:00
Peter Boyle
1b2914ec09 FT-HMC smearing, derivative chain rule, log det and force first pass. 2023-05-22 10:21:37 -04:00
Peter Boyle
519f795066 Header not liked by gcc on mac? puzzling 2023-05-22 10:21:12 -04:00
Alessandro Lupo
5897b93dd4 debug tests, fix dimension 2023-05-22 13:42:21 +01:00
Alessandro Lupo
af091e0881 DimensionHelper for 2index irreps 2023-05-21 16:56:06 +01:00
Alessandro Lupo
3c1e5e9517 Merge pull request #25 from LupoA/sp2n/unify_representations
Sp2n/unify representations [close #3]
2023-05-21 14:55:27 +01:00
Alessandro Lupo
85b2cb7a8a changing some hardcoded SUn lines 2023-05-21 14:50:28 +01:00
Peter Boyle
4240ad5ca8 Preparing for FTHMC 2023-05-19 21:21:55 -04:00
Peter Boyle
d418347d86 public for convenience to see rho params 2023-05-19 21:21:05 -04:00
Peter Boyle
29a4bfe5e5 Clean up 2023-05-19 21:20:45 -04:00
Peter Boyle
9955bf9daf Regresses to Qlat 2023-05-19 17:32:13 -04:00
Julian Lenz
b8bdc2eefb Unified two index representations 2023-05-18 18:36:29 +01:00
Julian Lenz
0078826ff1 Move static_assert inside of function 2023-05-18 18:14:53 +01:00
Julian Lenz
e855c41772 Unified spfundamental.h with fundamental.h 2023-05-18 18:11:20 +01:00
chillenzer
d169c275b6 Merge pull request #22 from LupoA/sp2n/unify_twoindex
Unify TwoIndex
2023-05-18 14:55:02 +00:00
Julian Lenz
a5125e23f4 Typo 2023-05-18 15:41:35 +01:00
Julian Lenz
7b83c80757 Merge branch 'sp2n/unify_twoindex' of github.com:LupoA/Grid into sp2n/unify_twoindex 2023-05-18 15:36:14 +01:00
Julian Lenz
e41821e206 Disable two index symmetric 2023-05-18 15:29:55 +01:00
Alessandro Lupo
5a75ab15a2 typo in 2S dim 2023-05-17 20:47:57 +01:00
Alessandro Lupo
932c783fbf 2AS for every Nc! 2023-05-17 20:22:05 +01:00
Julian Lenz
55f9cce577 Revert "Added automated HMC test for Nc=4"
This reverts commit eee27b8b30.
2023-05-17 09:17:48 +01:00
Alessandro Lupo
b3533ca847 correct tests (failing) 2023-05-16 17:43:52 +01:00
Alessandro Lupo
fd2a637010 test 2index 2023-05-16 14:10:39 +01:00
Julian Lenz
eee27b8b30 Added automated HMC test for Nc=4 2023-05-15 18:37:33 +01:00
Julian Lenz
8522352aa3 ClangFormat 2023-05-15 18:36:05 +01:00
Alessandro Lupo
3beb8f4091 fixing typo, getting pre-changes physics 2023-05-15 16:00:15 +01:00
Alessandro Lupo
12a706e9b1 de-hardcode the number of generators 2023-05-15 15:48:21 +01:00
Alessandro Lupo
170aa7df01 fix (dimension to be improved) 2023-05-15 15:20:18 +01:00
Julian Lenz
e8ad1fef53 Unify TwoIndex 2023-05-12 14:35:50 +01:00
Peter Boyle
876c8f4478 Nodes on padded cell 2023-05-11 12:35:49 -04:00
Peter Boyle
9c8750f261 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-05-11 12:29:09 -04:00
Peter Boyle
91efd08179 Option for Qlat generator basis 2023-05-11 12:27:45 -04:00
Peter Boyle
9953511b65 Mac compile 2023-05-11 12:27:29 -04:00
Peter Boyle
025fa9991a For FTHMC 2023-05-11 12:26:14 -04:00
Peter Boyle
e8c60c355b Padded cell code 2023-05-11 12:25:50 -04:00
Peter Boyle
6c9c7f9d85 Permute fix 2023-05-11 12:24:21 -04:00
Peter Boyle
f534523ede Debug 2023-05-11 12:23:11 -04:00
Peter Boyle
1b8a834beb Debug 2023-05-11 12:22:24 -04:00
Alessandro Lupo
aa9df63a05 rename group projections based on determinants 2023-05-10 14:50:52 +01:00
Meifeng Lin
2b6b98be48 Merge branch 'feature/omp-offload' of github.com:BNL-HPC/Grid into feature/omp-offload 2023-05-05 14:37:30 -04:00
chillenzer
3953312a93 Merge pull request #20 from LupoA/sp2n/unify_gaugeimpltypes
Sp2n/unify gaugeimpltypes
2023-05-03 15:17:10 +00:00
Julian Lenz
6e62f4f616 ClangFormat 2023-05-03 16:15:12 +01:00
Julian Lenz
6a7bdca53b Take over additional algebra tests from Alessandro 2023-05-03 16:02:02 +01:00
Julian Lenz
c7fba9aace Take over additional group tests from Alessandro 2023-05-03 16:01:48 +01:00
Julian Lenz
ac6c7cb8d6 Merge in Alessandro's changes [test fails] 2023-05-03 02:53:03 +01:00
Julian Lenz
c5924833a1 ClangFormat 2023-05-03 02:39:36 +01:00
Julian Lenz
ac0a74be0d Taken care of algebra tests 2023-05-03 02:32:42 +01:00
Julian Lenz
42b0e1125d Naming and argument types 2023-05-03 01:51:46 +01:00
Julian Lenz
339c4fda79 Extracted is_element_of Sp2n 2023-05-02 15:44:34 +01:00
Alessandro Lupo
9b85bf9402 better projection test 2023-05-02 15:42:20 +01:00
Alessandro Lupo
86b02c3cd8 cleaning up requested by Julian 2023-05-02 13:31:17 +01:00
Alessandro Lupo
7b3b7093fa cleaning up requested by Ed 2023-05-02 12:50:57 +01:00
Alessandro Lupo
881b08a465 Correct implementation of SpTa 2023-04-27 18:17:06 +01:00
Julian Lenz
3ee5444c69 Remove commented out stuff 2023-04-21 08:08:18 +01:00
Julian Lenz
5e28fe56d2 Remove code duplication: Iterating through vectors 2023-04-21 08:08:06 +01:00
Peter Boyle
3aa43e6065 Debug info 2023-04-20 14:21:13 -04:00
Peter Boyle
78ac4044ff HMC 2023-04-20 13:28:07 -04:00
Peter Boyle
119c3db47f Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-04-18 15:13:16 -04:00
Peter Boyle
21bbdb8fc2 Crusher 2023-04-18 15:11:16 -04:00
Alessandro Lupo
5aabe074fe Rename Sympl* to Sp* 2023-04-18 11:50:20 +01:00
Peter Boyle
739bd7572c Example code 2023-04-17 21:51:55 +00:00
Peter Boyle
074627a5bd Pass file descriptors through AF_UNIX for level_zero 2023-04-17 21:50:52 +00:00
Peter Boyle
6a23b2c599 Drop UVM 2023-04-17 21:49:58 +00:00
Alessandro Lupo
dace904c10 fix typo 2023-04-14 18:06:18 +01:00
Alessandro Lupo
be98d26610 small change I missed in previous commit 2023-04-13 17:48:43 +01:00
Peter Boyle
bd891fb3f5 tests to compile 2023-04-12 18:32:44 -04:00
Peter Boyle
3984265851 Merge pull request #432 from paboyle/hotfix/nvcc-warnings
Unused statements generating warnings removed
2023-04-12 16:59:02 -04:00
Peter Boyle
45361d188f Merge pull request #427 from fjosw/feat/bug_report_issue_template
Feat/bug report issue template
2023-04-12 16:58:41 -04:00
Peter Boyle
80c9d77e02 Merge pull request #433 from paboyle/hotfix/virtual-dtor
Virtual destructor for LinearOperator
2023-04-12 16:56:18 -04:00
Peter Boyle
3aff64dddb Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-04-11 12:19:15 -07:00
Peter Boyle
b4f2ca81ff Copy queue and compute queue same as better concurrency 2023-04-11 12:18:21 -07:00
Peter Boyle
d1dea5f840 New driver 2023-04-11 12:16:52 -07:00
Peter Boyle
54f8b84d16 Fence 2023-04-11 12:16:08 -07:00
Peter Boyle
da503fef0e Name change on barrier routine 2023-04-11 12:14:04 -07:00
Peter Boyle
4a6802098a Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-04-07 15:43:28 -04:00
Peter Boyle
f9b41a84d2 Trajectory runs to completion on Crusher within wall clock time 2023-04-07 15:42:45 -04:00
5d7e0d18b9 virtual destructor for LinearOperator 2023-04-07 14:30:38 +01:00
9e64387933 mores unused statements removed 2023-04-07 14:27:18 +01:00
983b681d46 unused statement cleaning 2023-04-07 14:12:02 +01:00
4072408b6f Update README.md 2023-04-07 11:45:28 +01:00
bd76b47fbf Update CI badge in README 2023-04-07 11:44:48 +01:00
Alessandro Lupo
178376f24b minor stylistic changes 2023-04-06 12:08:17 +01:00
18ce23aa75 Fix NEON SIMD 2023-04-06 11:30:48 +01:00
meifeng
5b50eaa55f Merge pull request #5 from atif4461/omp-offload-develop
Pull omp offload develop branch
2023-04-05 13:32:48 -04:00
chillenzer
6a0eb466ee Merge pull request #19 from LupoA/refactoring_sp2n
refactoring sp2n
2023-04-05 10:50:58 +00:00
Peter Boyle
ffa7fe0cc2 Merge branch 'feature/dirichlet' into develop 2023-04-04 23:13:52 -04:00
Peter Boyle
6b979f0a69 Dirichlet improvements that I failed to commit 2023-04-04 23:13:17 -04:00
Alessandro Lupo
4ea29b8f0f Template group into GaugeImplTypes. Closing #2 2023-04-04 17:49:28 +01:00
Alessandro Lupo
778291230a expand ProjecOnGaugeGroup, change ProjectOnSp2nAlgebra into SpTa, fixing some of its issues 2023-04-04 17:48:13 +01:00
Mohammad Atif
3671ace5a1 added omp allocators and dev copies 2023-04-04 12:10:03 -04:00
Peter Boyle
86dac5ff4f Better printing 2023-04-04 07:42:19 -07:00
Peter Boyle
4a382fad3f Use distinct SYCL queue for copies 2023-04-04 07:41:41 -07:00
Peter Boyle
cc753670d9 Barrier elimination, surface list build 2023-04-04 07:39:14 -07:00
Peter Boyle
cc9d88ea1c Fence changes and EXT kernel loop cout reduction 2023-04-04 07:37:23 -07:00
Peter Boyle
b281b0166e Put the barrier in the subroutine 2023-04-04 07:36:03 -07:00
Peter Boyle
6a21f694ff Apply barrier in Gather kernel sequence.
Could place before comms, or in Gather, but decided to insist Gather means Gather is done
2023-04-04 07:33:24 -07:00
Peter Boyle
fc4db5e963 Merge branch 'feature/dirichlet' of https://github.com/paboyle/Grid into feature/dirichlet 2023-04-03 18:26:11 -04:00
Peter Boyle
6252ffaf76 No unified 2023-04-03 18:25:22 -04:00
Alessandro Lupo
026e736dfa Projection on algebra can now be templated. Fix #12 2023-04-03 16:31:19 +01:00
Alessandro Lupo
4275b3f431 Fix typo and remove unnecessary lines 2023-04-03 12:01:52 +01:00
Peter Boyle
af64c1c6b6 Had managed to drop the accelerator_barrier() in the Wilson Compressor gather 2023-03-30 17:34:44 -04:00
Peter Boyle
866f48391a Temporary fix for develop incorrect results 2023-03-30 17:10:13 -04:00
Peter Boyle
a4df527d74 Merge pull request #428 from mmphys/bugfix/comm_none
Fixes for --enable-comms=none
2023-03-30 08:38:14 -04:00
Michael Marshall
5764d21161 Fixes for --enable-comms=none 2023-03-30 10:15:28 +01:00
Peter Boyle
496d04cd85 Weaken the Fence 2023-03-29 18:58:51 -04:00
39214702f6 feat: indentation fixed. 2023-03-28 16:30:34 +02:00
3e4614c63a feat: draft for bug-report issue template added. 2023-03-28 16:24:35 +02:00
Alessandro Lupo
1b8176e2c0 fix code duplication 2023-03-17 14:58:00 +00:00
Alessandro Lupo
cbc053c3db Revert "projection on Sp2n algebra, to be used instead of Ta"
This reverts commit ba7f9d7b70.
2023-03-17 11:36:58 +00:00
Alessandro Lupo
cdf3f6ef6e Merge branch 'refactoring_sp2n' of https://github.com/LupoA/Grid into refactoring_sp2n 2023-03-15 15:59:50 +00:00
Alessandro Lupo
ba7f9d7b70 projection on Sp2n algebra, to be used instead of Ta 2023-03-15 15:55:12 +00:00
Alessandro Lupo
371fd123fb consequence of iSUnMatrix being no longer a member of the SU class 2023-03-14 10:47:07 +00:00
Alessandro Lupo
d6ff644aab Towards the day all tests compile 2023-03-14 10:43:25 +00:00
Julian Lenz
29586f6b5e Deactivate some tests for Nc!=3 2023-03-13 08:17:14 +00:00
Alessandro Lupo
fd057c838f add ProjectOnGaugeGroup and ProjectGn to allow future templating in GaugeImplTypes 2023-03-10 12:10:46 +00:00
Alessandro Lupo
f51222086c Move functions from GaugeGroup to group specific implementations 2023-03-09 16:22:20 +00:00
Alessandro Lupo
f73691ec47 Merge pull request #18 from nickforce989/sp2n/newbranch
Sp2n/newbranch
2023-02-13 10:22:27 +01:00
Peter Boyle
ccd21f96ff Plaquette agreeing and moving to final form (slowly) need to optimise 2023-02-01 22:57:44 -05:00
Peter Boyle
4b90cb8888 First cut passes combining padded cell with general stencil towards fast plaquette and staggered force 2023-02-01 22:14:10 -05:00
Niccolo Forzano
7ebda3e9ec Merge commit 'b10e1b7bc8bec809f874e9e48a3ccc7b2619c9d1' into sp2n/newbranch 2023-01-19 12:10:18 +00:00
Niccolo Forzano
b10e1b7bc8 Fixed files giving zero force computation on GPU, issue #8 2023-01-18 18:04:47 +00:00
Meifeng Lin
a9df27f18d Merge branch 'develop' of https://www.github.com/paboyle/Grid into feature/omp-offload 2023-01-10 14:52:14 -05:00
Meifeng Lin
26ad759469 bug fix in HOWTO 2022-12-20 10:33:24 -08:00
Meifeng Lin
ed723909a2 Update HOWTO with an example config-command 2022-12-20 10:20:14 -08:00
Meifeng Lin
36ffe79093 Add simple HOWTO instructions and module load script for Cori GPU 2022-12-20 10:10:27 -08:00
Meifeng Lin
1df8669898 change loop counts to local variables so clang compiler doesn't complain 2022-12-20 10:09:50 -08:00
Alessandro Lupo
d7dea44ce7 Merge pull request #17 from chillenzer/unify_gauge_groups
Fix compilation error in nvcc (closes #15)
2022-12-19 16:24:03 +00:00
Julian Lenz
37b6b82869 Fix file extensions 2022-12-18 16:12:56 +00:00
Julian Lenz
92ad5b8f74 Compiler error fix: NVCC requires names for templ. par. 2022-12-18 15:50:19 +00:00
Meifeng Lin
f6661ce29b Merged openmp offload implementation with develop 2022-12-13 18:32:55 -05:00
Meifeng Lin
9b3ac3c23f Added stdout for number of GPU threads; 2022-12-13 15:14:01 -08:00
Meifeng Lin
c33a3b3b40 Fixed --accelerator-threads input to omp target thread_limit() 2022-12-13 15:13:11 -08:00
meifeng
40ee605591 Merge pull request #1 from paboyle/develop
Update to Grid's main repo
2022-12-13 09:12:57 -05:00
Alessandro Lupo
8c80f1c168 Merge pull request #14 from chillenzer/unify_gauge_groups
Unify gauge groups (closes #5)
2022-12-01 17:35:46 +00:00
Julian Lenz
0af7d5a793 Rename Grid/qcd/utils/<Group>_impl.h -> Grid/qcd/utils/<Group>.h 2022-11-30 17:12:00 +00:00
Julian Lenz
505fa49983 Renamed SUn.h -> GaugeGroup.h 2022-11-30 17:09:48 +00:00
Julian Lenz
7bcf33def9 Removed Sp2n.h 2022-11-30 16:59:46 +00:00
Julian Lenz
a13820656a Removed iSUnMatrix, etc. 2022-11-30 15:09:03 +00:00
Julian Lenz
fa71b46a41 Hide nsp 2022-11-30 14:44:23 +00:00
Julian Lenz
b8b3ae6ac1 Make helper functions private 2022-11-30 13:29:14 +00:00
Julian Lenz
55c008da21 Removed forward declaration 2022-11-30 13:12:21 +00:00
Julian Lenz
2507606bd0 With function overloading (still dirty). 2022-11-30 12:54:36 +00:00
Julian Lenz
7c2ad4f8c8 Attempt with SFINAE (failed) 2022-11-30 11:57:39 +00:00
Julian Lenz
54c8025aad Remove unnecessary pwd in scripts/filelist 2022-11-28 17:50:38 +00:00
Julian Lenz
921e23e83c Separated out everything SU specific 2022-11-28 17:47:50 +00:00
Julian Lenz
6e750ecb0e Remove apparently forgotten file 2022-11-28 16:33:46 +00:00
Julian Lenz
b8f1f5d2a3 Introduce GaugeGroup 2022-11-25 17:45:32 +00:00
Julian Lenz
9273f2937c Autoformat google style 2022-11-25 17:44:08 +00:00
Julian Lenz
1aa28b47ae Add existing test to check 2022-11-25 17:40:40 +00:00
Julian Lenz
629cb2987a Fix typo in Makefile.am 2022-11-25 17:40:21 +00:00
Julian Lenz
03235d6368 Fixed type in configure.ac 2022-11-25 16:57:40 +00:00
Alessandro Lupo
22064c7e4c Fixing #11 2022-11-25 13:10:29 +00:00
Alessandro Lupo
2de03e5172 Revert "Revert "Fixing issue #11: consistent use of ncolour and nsp""
This reverts commit 3af4929dda.
2022-11-23 19:40:28 +00:00
Alessandro Lupo
3af4929dda Revert "Fixing issue #11: consistent use of ncolour and nsp"
This reverts commit 1ba429345b.
2022-11-23 19:34:59 +00:00
Alessandro Lupo
1ba429345b Fixing issue #11: consistent use of ncolour and nsp 2022-11-23 18:45:01 +00:00
Alessandro Lupo
88bdd4344b 2indx antisymm representation of sp2n 2021-11-04 18:27:35 +00:00
Alessandro Lupo
4044536eea add projection on sp2n algebra 2021-10-26 10:20:44 +01:00
Alessandro Lupo
4d8ae6221c fix projection 2021-10-22 10:44:54 +01:00
Alessandro Lupo
4e31e4e094 Better tests 2021-10-13 15:07:23 +01:00
Alessandro Lupo
0d6674e489 hot start for sp2n 2021-10-12 18:53:54 +01:00
Alessandro Lupo
b145fd4f5b necessary to merge 2021-10-12 17:08:46 +01:00
Alessandro Lupo
8a5b794f25 necessary change to merge with upstrm 2021-10-12 16:04:03 +01:00
Alessandro Lupo
291e80f88a sp2n as config option 2021-10-12 16:00:32 +01:00
Alessandro Lupo
1ace5850ae first hmc 2021-10-12 16:00:32 +01:00
Alessandro Lupo
283f14b7c1 fix sp2n projection 2021-10-12 16:00:32 +01:00
Alessandro Lupo
1d6e708083 tests! 2021-10-12 16:00:32 +01:00
Alessandro Lupo
89457e25e3 sp fermion instantiation 2021-10-12 16:00:32 +01:00
Alessandro Lupo
7e3b298d3d project on sp2n 2021-10-12 16:00:32 +01:00
Alessandro Lupo
7ff3e5eed4 gauge and fermion implementation for sp2n 2021-10-12 16:00:32 +01:00
Alessandro Lupo
19eb51cf41 sp2n generators 2021-10-12 15:53:33 +01:00
Alessandro Lupo
470d4dcc6d sp2n as config option 2021-10-12 15:47:56 +01:00
Alessandro Lupo
ed03bfd555 first hmc 2021-10-12 12:16:47 +01:00
Alessandro Lupo
8c0fbcccae fix sp2n projection 2021-10-12 12:12:16 +01:00
Alessandro Lupo
d4866157fe tests! 2021-10-12 09:06:15 +01:00
Alessandro Lupo
b6496b6cb5 sp fermion instantiation 2021-10-11 16:32:10 +01:00
Alessandro Lupo
4f5fe57920 project on sp2n 2021-10-11 16:28:15 +01:00
Alessandro Lupo
11fb943b1e gauge and fermion implementation for sp2n 2021-10-11 16:21:25 +01:00
Alessandro Lupo
046a23121e sp2n generators 2021-10-05 15:51:22 +01:00
Meifeng Lin
c2f8ba194e Working simple OpenMP offloading with cudaMallocManaged; cshift not working 2021-09-29 15:23:13 -07:00
Meifeng Lin
229ce57fef Added example config-command 2021-09-27 19:01:32 -04:00
Meifeng Lin
712b326e40 Added OpenMP target offloading support 2021-09-27 19:00:18 -04:00
215 changed files with 27227 additions and 2423 deletions

54
.github/ISSUE_TEMPLATE/bug-report.yml vendored Normal file
View File

@@ -0,0 +1,54 @@
name: Bug report
description: Report a bug.
title: "<insert title>"
labels: [bug]
body:
- type: markdown
attributes:
value: >
Thank you for taking the time to file a bug report.
Please check that the code is pointing to the HEAD of develop
or any commit in master which is tagged with a version number.
- type: textarea
attributes:
label: "Describe the issue:"
description: >
Describe the issue and any previous attempt to solve it.
validations:
required: true
- type: textarea
attributes:
label: "Code example:"
description: >
If relevant, show how to reproduce the issue using a minimal working
example.
placeholder: |
<< your code here >>
render: shell
validations:
required: false
- type: textarea
attributes:
label: "Target platform:"
description: >
Give a description of the target platform (CPU, network, compiler).
Please give the full CPU part description, using for example
`cat /proc/cpuinfo | grep 'model name' | uniq` (Linux)
or `sysctl machdep.cpu.brand_string` (macOS) and the full output
the `--version` option of your compiler.
validations:
required: true
- type: textarea
attributes:
label: "Configure options:"
description: >
Please give the exact configure command used and attach
`config.log`, `grid.config.summary` and the output of `make V=1`.
render: shell
validations:
required: true

View File

@@ -66,6 +66,10 @@ if BUILD_FERMION_REPS
extra_sources+=$(ADJ_FERMION_FILES)
extra_sources+=$(TWOIND_FERMION_FILES)
endif
if BUILD_SP
extra_sources+=$(SP_FERMION_FILES)
extra_sources+=$(SP_TWOIND_FERMION_FILES)
endif
lib_LIBRARIES = libGrid.a

View File

@@ -542,6 +542,7 @@ public:
(*this)(in[i], out[i]);
}
}
virtual ~LinearFunction(){};
};
template<class Field> class IdentityLinearFunction : public LinearFunction<Field> {

View File

@@ -166,16 +166,16 @@ public:
rsqf[s] =rsq[s];
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup: shift "<< s <<" target resid "<<rsq[s]<<std::endl;
// ps_d[s] = src_d;
precisionChangeFast(ps_f[s],src_d);
precisionChange(ps_f[s],src_d);
}
// r and p for primary
p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys
r_d = p_d;
//MdagM+m[0]
precisionChangeFast(p_f,p_d);
precisionChange(p_f,p_d);
Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
precisionChangeFast(tmp_d,mmp_f);
precisionChange(tmp_d,mmp_f);
Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
tmp_d = tmp_d - mmp_d;
std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
@@ -204,7 +204,7 @@ public:
for(int s=0;s<nshift;s++) {
axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d);
precisionChangeFast(psi_f[s],psi_d[s]);
precisionChange(psi_f[s],psi_d[s]);
}
///////////////////////////////////////
@@ -225,7 +225,7 @@ public:
AXPYTimer.Stop();
PrecChangeTimer.Start();
precisionChangeFast(r_f, r_d);
precisionChange(r_f, r_d);
PrecChangeTimer.Stop();
AXPYTimer.Start();
@@ -243,13 +243,13 @@ public:
cp=c;
PrecChangeTimer.Start();
precisionChangeFast(p_f, p_d); //get back single prec search direction for linop
precisionChange(p_f, p_d); //get back single prec search direction for linop
PrecChangeTimer.Stop();
MatrixTimer.Start();
Linop_f.HermOp(p_f,mmp_f);
MatrixTimer.Stop();
PrecChangeTimer.Start();
precisionChangeFast(mmp_d, mmp_f); // From Float to Double
precisionChange(mmp_d, mmp_f); // From Float to Double
PrecChangeTimer.Stop();
d=real(innerProduct(p_d,mmp_d));
@@ -311,7 +311,7 @@ public:
SolverTimer.Stop();
for(int s=0;s<nshift;s++){
precisionChangeFast(psi_d[s],psi_f[s]);
precisionChange(psi_d[s],psi_f[s]);
}

View File

@@ -211,7 +211,7 @@ public:
Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
tmp_d = tmp_d - mmp_d;
std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
// assert(norm2(tmp_d)< 1.0e-4);
assert(norm2(tmp_d)< 1.0);
axpy(mmp_d,mass[0],p_d,mmp_d);
RealD rn = norm2(p_d);

View File

@@ -419,14 +419,15 @@ until convergence
}
}
if ( Nconv < Nstop )
if ( Nconv < Nstop ) {
std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl;
std::cout << GridLogIRL << "returning Nstop vectors, the last "<< Nstop-Nconv << "of which might meet convergence criterion only approximately" <<std::endl;
}
eval=eval2;
//Keep only converged
eval.resize(Nconv);// Nstop?
evec.resize(Nconv,grid);// Nstop?
eval.resize(Nstop);// was Nconv
evec.resize(Nstop,grid);// was Nconv
basisSortInPlace(evec,eval,reverse);
}

View File

@@ -222,6 +222,9 @@ void MemoryManager::InitMessage(void) {
#ifdef GRID_SYCL
std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_shared"<<std::endl;
#endif
#ifdef GRID_OMPTARGET
std::cout << GridLogMessage<< "MemoryManager::Init() Using OMPTARGET managed memory"<<std::endl;
#endif
#else
std::cout << GridLogMessage<< "MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory"<<std::endl;
#ifdef GRID_CUDA
@@ -233,6 +236,9 @@ void MemoryManager::InitMessage(void) {
#ifdef GRID_SYCL
std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_device"<<std::endl;
#endif
#ifdef GRID_OMPTARGET
std::cout << GridLogMessage<< "MemoryManager::Init() Using OMPTARGET omp_alloc_device"<<std::endl;
#endif
#endif
}

View File

@@ -519,7 +519,6 @@ void MemoryManager::Audit(std::string s)
uint64_t LruBytes1=0;
uint64_t LruBytes2=0;
uint64_t LruCnt=0;
uint64_t LockedBytes=0;
std::cout << " Memory Manager::Audit() from "<<s<<std::endl;
for(auto it=LRU.begin();it!=LRU.end();it++){

View File

@@ -128,7 +128,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
int recv_from_rank,int dor,
int xbytes,int rbytes, int dir)
{
return 2.0*bytes;
return xbytes+rbytes;
}
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
{

View File

@@ -91,6 +91,59 @@ void *SharedMemory::ShmBufferSelf(void)
//std::cerr << "ShmBufferSelf "<<ShmRank<<" "<<std::hex<< ShmCommBufs[ShmRank] <<std::dec<<std::endl;
return ShmCommBufs[ShmRank];
}
static inline int divides(int a,int b)
{
return ( b == ( (b/a)*a ) );
}
void GlobalSharedMemory::GetShmDims(const Coordinate &WorldDims,Coordinate &ShmDims)
{
////////////////////////////////////////////////////////////////
// Allow user to configure through environment variable
////////////////////////////////////////////////////////////////
char* str = getenv(("GRID_SHM_DIMS_" + std::to_string(ShmDims.size())).c_str());
if ( str ) {
std::vector<int> IntShmDims;
GridCmdOptionIntVector(std::string(str),IntShmDims);
assert(IntShmDims.size() == WorldDims.size());
long ShmSize = 1;
for (int dim=0;dim<WorldDims.size();dim++) {
ShmSize *= (ShmDims[dim] = IntShmDims[dim]);
assert(divides(ShmDims[dim],WorldDims[dim]));
}
assert(ShmSize == WorldShmSize);
return;
}
////////////////////////////////////////////////////////////////
// Powers of 2,3,5 only in prime decomposition for now
////////////////////////////////////////////////////////////////
int ndimension = WorldDims.size();
ShmDims=Coordinate(ndimension,1);
std::vector<int> primes({2,3,5});
int dim = 0;
int last_dim = ndimension - 1;
int AutoShmSize = 1;
while(AutoShmSize != WorldShmSize) {
int p;
for(p=0;p<primes.size();p++) {
int prime=primes[p];
if ( divides(prime,WorldDims[dim]/ShmDims[dim])
&& divides(prime,WorldShmSize/AutoShmSize) ) {
AutoShmSize*=prime;
ShmDims[dim]*=prime;
last_dim = dim;
break;
}
}
if (p == primes.size() && last_dim == dim) {
std::cerr << "GlobalSharedMemory::GetShmDims failed" << std::endl;
exit(EXIT_FAILURE);
}
dim=(dim+1) %ndimension;
}
}
NAMESPACE_END(Grid);

View File

@@ -27,9 +27,10 @@ Author: Christoph Lehner <christoph@lhnr.de>
*************************************************************************************/
/* END LEGAL */
#define Mheader "SharedMemoryMpi: "
#include <Grid/GridCore.h>
#include <pwd.h>
#include <syscall.h>
#ifdef GRID_CUDA
#include <cuda_runtime_api.h>
@@ -39,11 +40,118 @@ Author: Christoph Lehner <christoph@lhnr.de>
#endif
#ifdef GRID_SYCL
#define GRID_SYCL_LEVEL_ZERO_IPC
#include <syscall.h>
#define SHM_SOCKETS
#endif
#include <sys/socket.h>
#include <sys/un.h>
NAMESPACE_BEGIN(Grid);
#ifdef SHM_SOCKETS
/*
* Barbaric extra intranode communication route in case we need sockets to pass FDs
* Forced by level_zero not being nicely designed
*/
static int sock;
static const char *sock_path_fmt = "/tmp/GridUnixSocket.%d";
static char sock_path[256];
class UnixSockets {
public:
static void Open(int rank)
{
int errnum;
sock = socket(AF_UNIX, SOCK_DGRAM, 0); assert(sock>0);
struct sockaddr_un sa_un = { 0 };
sa_un.sun_family = AF_UNIX;
snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,rank);
unlink(sa_un.sun_path);
if (bind(sock, (struct sockaddr *)&sa_un, sizeof(sa_un))) {
perror("bind failure");
exit(EXIT_FAILURE);
}
}
static int RecvFileDescriptor(void)
{
int n;
int fd;
char buf[1];
struct iovec iov;
struct msghdr msg;
struct cmsghdr *cmsg;
char cms[CMSG_SPACE(sizeof(int))];
iov.iov_base = buf;
iov.iov_len = 1;
memset(&msg, 0, sizeof msg);
msg.msg_name = 0;
msg.msg_namelen = 0;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = (caddr_t)cms;
msg.msg_controllen = sizeof cms;
if((n=recvmsg(sock, &msg, 0)) < 0) {
perror("recvmsg failed");
return -1;
}
if(n == 0){
perror("recvmsg returned 0");
return -1;
}
cmsg = CMSG_FIRSTHDR(&msg);
memmove(&fd, CMSG_DATA(cmsg), sizeof(int));
return fd;
}
static void SendFileDescriptor(int fildes,int xmit_to_rank)
{
struct msghdr msg;
struct iovec iov;
struct cmsghdr *cmsg = NULL;
char ctrl[CMSG_SPACE(sizeof(int))];
char data = ' ';
memset(&msg, 0, sizeof(struct msghdr));
memset(ctrl, 0, CMSG_SPACE(sizeof(int)));
iov.iov_base = &data;
iov.iov_len = sizeof(data);
sprintf(sock_path,sock_path_fmt,xmit_to_rank);
struct sockaddr_un sa_un = { 0 };
sa_un.sun_family = AF_UNIX;
snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,xmit_to_rank);
msg.msg_name = (void *)&sa_un;
msg.msg_namelen = sizeof(sa_un);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_controllen = CMSG_SPACE(sizeof(int));
msg.msg_control = ctrl;
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
*((int *) CMSG_DATA(cmsg)) = fildes;
sendmsg(sock, &msg, 0);
};
};
#endif
NAMESPACE_BEGIN(Grid);
#define header "SharedMemoryMpi: "
/*Construct from an MPI communicator*/
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
{
@@ -66,8 +174,8 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
MPI_Comm_size(WorldShmComm ,&WorldShmSize);
if ( WorldRank == 0) {
std::cout << header " World communicator of size " <<WorldSize << std::endl;
std::cout << header " Node communicator of size " <<WorldShmSize << std::endl;
std::cout << Mheader " World communicator of size " <<WorldSize << std::endl;
std::cout << Mheader " Node communicator of size " <<WorldShmSize << std::endl;
}
// WorldShmComm, WorldShmSize, WorldShmRank
@@ -170,59 +278,7 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M
if(nscan==3 && HPEhypercube ) OptimalCommunicatorHypercube(processors,optimal_comm,SHM);
else OptimalCommunicatorSharedMemory(processors,optimal_comm,SHM);
}
static inline int divides(int a,int b)
{
return ( b == ( (b/a)*a ) );
}
void GlobalSharedMemory::GetShmDims(const Coordinate &WorldDims,Coordinate &ShmDims)
{
////////////////////////////////////////////////////////////////
// Allow user to configure through environment variable
////////////////////////////////////////////////////////////////
char* str = getenv(("GRID_SHM_DIMS_" + std::to_string(ShmDims.size())).c_str());
if ( str ) {
std::vector<int> IntShmDims;
GridCmdOptionIntVector(std::string(str),IntShmDims);
assert(IntShmDims.size() == WorldDims.size());
long ShmSize = 1;
for (int dim=0;dim<WorldDims.size();dim++) {
ShmSize *= (ShmDims[dim] = IntShmDims[dim]);
assert(divides(ShmDims[dim],WorldDims[dim]));
}
assert(ShmSize == WorldShmSize);
return;
}
////////////////////////////////////////////////////////////////
// Powers of 2,3,5 only in prime decomposition for now
////////////////////////////////////////////////////////////////
int ndimension = WorldDims.size();
ShmDims=Coordinate(ndimension,1);
std::vector<int> primes({2,3,5});
int dim = 0;
int last_dim = ndimension - 1;
int AutoShmSize = 1;
while(AutoShmSize != WorldShmSize) {
int p;
for(p=0;p<primes.size();p++) {
int prime=primes[p];
if ( divides(prime,WorldDims[dim]/ShmDims[dim])
&& divides(prime,WorldShmSize/AutoShmSize) ) {
AutoShmSize*=prime;
ShmDims[dim]*=prime;
last_dim = dim;
break;
}
}
if (p == primes.size() && last_dim == dim) {
std::cerr << "GlobalSharedMemory::GetShmDims failed" << std::endl;
exit(EXIT_FAILURE);
}
dim=(dim+1) %ndimension;
}
}
void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
{
////////////////////////////////////////////////////////////////
@@ -396,7 +452,7 @@ void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const Coordinate &proce
#ifdef GRID_MPI3_SHMGET
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << header "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
@@ -481,7 +537,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
exit(EXIT_FAILURE);
}
std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
std::cout << WorldRank << Mheader " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
<< "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
SharedMemoryZero(ShmCommBuf,bytes);
@@ -524,7 +580,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
exit(EXIT_FAILURE);
}
if ( WorldRank == 0 ){
std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
std::cout << WorldRank << Mheader " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
<< "bytes at "<< std::hex<< ShmCommBuf << " - "<<(bytes-1+(uint64_t)ShmCommBuf) <<std::dec<<" for comms buffers " <<std::endl;
}
SharedMemoryZero(ShmCommBuf,bytes);
@@ -532,8 +588,13 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Loop over ranks/gpu's on our node
///////////////////////////////////////////////////////////////////////////////////////////////////////////
#ifdef SHM_SOCKETS
UnixSockets::Open(WorldShmRank);
#endif
for(int r=0;r<WorldShmSize;r++){
MPI_Barrier(WorldShmComm);
#ifndef GRID_MPI3_SHM_NONE
//////////////////////////////////////////////////
// If it is me, pass around the IPC access key
@@ -541,24 +602,32 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
void * thisBuf = ShmCommBuf;
if(!Stencil_force_mpi) {
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
typedef struct { int fd; pid_t pid ; } clone_mem_t;
typedef struct { int fd; pid_t pid ; ze_ipc_mem_handle_t ze; } clone_mem_t;
auto zeDevice = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device());
auto zeContext = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context());
auto zeDevice = cl::sycl::get_native<cl::sycl::backend::ext_oneapi_level_zero>(theGridAccelerator->get_device());
auto zeContext = cl::sycl::get_native<cl::sycl::backend::ext_oneapi_level_zero>(theGridAccelerator->get_context());
ze_ipc_mem_handle_t ihandle;
clone_mem_t handle;
if ( r==WorldShmRank ) {
auto err = zeMemGetIpcHandle(zeContext,ShmCommBuf,&ihandle);
if ( err != ZE_RESULT_SUCCESS ) {
std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
std::cerr << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
exit(EXIT_FAILURE);
} else {
std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
}
memcpy((void *)&handle.fd,(void *)&ihandle,sizeof(int));
handle.pid = getpid();
memcpy((void *)&handle.ze,(void *)&ihandle,sizeof(ihandle));
#ifdef SHM_SOCKETS
for(int rr=0;rr<WorldShmSize;rr++){
if(rr!=r){
UnixSockets::SendFileDescriptor(handle.fd,rr);
}
}
#endif
}
#endif
#ifdef GRID_CUDA
@@ -586,6 +655,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
// Share this IPC handle across the Shm Comm
//////////////////////////////////////////////////
{
MPI_Barrier(WorldShmComm);
int ierr=MPI_Bcast(&handle,
sizeof(handle),
MPI_BYTE,
@@ -601,6 +671,10 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
if ( r!=WorldShmRank ) {
thisBuf = nullptr;
int myfd;
#ifdef SHM_SOCKETS
myfd=UnixSockets::RecvFileDescriptor();
#else
std::cout<<"mapping seeking remote pid/fd "
<<handle.pid<<"/"
<<handle.fd<<std::endl;
@@ -608,16 +682,22 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
int pidfd = syscall(SYS_pidfd_open,handle.pid,0);
std::cout<<"Using IpcHandle pidfd "<<pidfd<<"\n";
// int myfd = syscall(SYS_pidfd_getfd,pidfd,handle.fd,0);
int myfd = syscall(438,pidfd,handle.fd,0);
std::cout<<"Using IpcHandle myfd "<<myfd<<"\n";
myfd = syscall(438,pidfd,handle.fd,0);
int err_t = errno;
if (myfd < 0) {
fprintf(stderr,"pidfd_getfd returned %d errno was %d\n", myfd,err_t); fflush(stderr);
perror("pidfd_getfd failed ");
assert(0);
}
#endif
std::cout<<"Using IpcHandle mapped remote pid "<<handle.pid <<" FD "<<handle.fd <<" to myfd "<<myfd<<"\n";
memcpy((void *)&ihandle,(void *)&handle.ze,sizeof(ihandle));
memcpy((void *)&ihandle,(void *)&myfd,sizeof(int));
auto err = zeMemOpenIpcHandle(zeContext,zeDevice,ihandle,0,&thisBuf);
if ( err != ZE_RESULT_SUCCESS ) {
std::cout << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl;
std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
std::cerr << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl;
std::cerr << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
exit(EXIT_FAILURE);
} else {
std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle succeeded for rank "<<r<<std::endl;
@@ -652,6 +732,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#else
WorldShmCommBufs[r] = ShmCommBuf;
#endif
MPI_Barrier(WorldShmComm);
}
_ShmAllocBytes=bytes;
@@ -663,7 +744,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#ifdef GRID_MPI3_SHMMMAP
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << header "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -700,7 +781,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
assert(((uint64_t)ptr&0x3F)==0);
close(fd);
WorldShmCommBufs[r] =ptr;
// std::cout << header "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
// std::cout << Mheader "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
}
_ShmAlloc=1;
_ShmAllocBytes = bytes;
@@ -710,7 +791,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#ifdef GRID_MPI3_SHM_NONE
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << header "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -757,7 +838,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
////////////////////////////////////////////////////////////////////////////////////////////
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << header "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
MPI_Barrier(WorldShmComm);

View File

@@ -68,7 +68,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Each MPI rank should allocate our own buffer
///////////////////////////////////////////////////////////////////////////////////////////////////////////
ShmCommBuf = acceleratorAllocDevice(bytes);
ShmCommBuf = acceleratorAllocShared(bytes);
//ShmCommBuf = acceleratorAllocDevice(bytes);
if (ShmCommBuf == (void *)NULL ) {
std::cerr << " SharedMemoryNone.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;

View File

@@ -29,8 +29,27 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid);
extern Vector<std::pair<int,int> > Cshift_table;
extern std::vector<std::pair<int,int> > Cshift_table;
extern commVector<std::pair<int,int> > Cshift_table_device;
inline std::pair<int,int> *MapCshiftTable(void)
{
// GPU version
#ifdef ACCELERATOR_CSHIFT
uint64_t sz=Cshift_table.size();
if (Cshift_table_device.size()!=sz ) {
Cshift_table_device.resize(sz);
}
acceleratorCopyToDevice((void *)&Cshift_table[0],
(void *)&Cshift_table_device[0],
sizeof(Cshift_table[0])*sz);
return &Cshift_table_device[0];
#else
return &Cshift_table[0];
#endif
// CPU version use identify map
}
///////////////////////////////////////////////////////////////////
// Gather for when there is no need to SIMD split
///////////////////////////////////////////////////////////////////
@@ -74,8 +93,8 @@ Gather_plane_simple (const Lattice<vobj> &rhs,cshiftVector<vobj> &buffer,int dim
}
{
auto buffer_p = & buffer[0];
auto table = &Cshift_table[0];
#ifdef ACCELERATOR_CSHIFT
auto table = MapCshiftTable();
#ifdef ACCELERATOR_CSHIFT
autoView(rhs_v , rhs, AcceleratorRead);
accelerator_for(i,ent,vobj::Nsimd(),{
coalescedWrite(buffer_p[table[i].first],coalescedRead(rhs_v[table[i].second]));
@@ -225,7 +244,7 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,cshiftVector<
{
auto buffer_p = & buffer[0];
auto table = &Cshift_table[0];
auto table = MapCshiftTable();
#ifdef ACCELERATOR_CSHIFT
autoView( rhs_v, rhs, AcceleratorWrite);
accelerator_for(i,ent,vobj::Nsimd(),{
@@ -297,30 +316,6 @@ template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,ExtractPointerA
}
}
#if (defined(GRID_CUDA) || defined(GRID_HIP)) && defined(ACCELERATOR_CSHIFT)
template <typename T>
T iDivUp(T a, T b) // Round a / b to nearest higher integer value
{ return (a % b != 0) ? (a / b + 1) : (a / b); }
template <typename T>
__global__ void populate_Cshift_table(T* vector, T lo, T ro, T e1, T e2, T stride)
{
int idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx >= e1*e2) return;
int n, b, o;
n = idx / e2;
b = idx % e2;
o = n*stride + b;
vector[2*idx + 0] = lo + o;
vector[2*idx + 1] = ro + o;
}
#endif
//////////////////////////////////////////////////////
// local to node block strided copies
//////////////////////////////////////////////////////
@@ -345,20 +340,12 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
int ent=0;
if(cbmask == 0x3 ){
#if (defined(GRID_CUDA) || defined(GRID_HIP)) && defined(ACCELERATOR_CSHIFT)
ent = e1*e2;
dim3 blockSize(acceleratorThreads());
dim3 gridSize(iDivUp((unsigned int)ent, blockSize.x));
populate_Cshift_table<<<gridSize, blockSize>>>(&Cshift_table[0].first, lo, ro, e1, e2, stride);
accelerator_barrier();
#else
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride+b;
Cshift_table[ent++] = std::pair<int,int>(lo+o,ro+o);
}
}
#endif
} else {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
@@ -372,7 +359,7 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
}
{
auto table = &Cshift_table[0];
auto table = MapCshiftTable();
#ifdef ACCELERATOR_CSHIFT
autoView(rhs_v , rhs, AcceleratorRead);
autoView(lhs_v , lhs, AcceleratorWrite);
@@ -409,19 +396,11 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
int ent=0;
if ( cbmask == 0x3 ) {
#if (defined(GRID_CUDA) || defined(GRID_HIP)) && defined(ACCELERATOR_CSHIFT)
ent = e1*e2;
dim3 blockSize(acceleratorThreads());
dim3 gridSize(iDivUp((unsigned int)ent, blockSize.x));
populate_Cshift_table<<<gridSize, blockSize>>>(&Cshift_table[0].first, lo, ro, e1, e2, stride);
accelerator_barrier();
#else
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride;
Cshift_table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
}}
#endif
} else {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
@@ -432,7 +411,7 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
}
{
auto table = &Cshift_table[0];
auto table = MapCshiftTable();
#ifdef ACCELERATOR_CSHIFT
autoView( rhs_v, rhs, AcceleratorRead);
autoView( lhs_v, lhs, AcceleratorWrite);

View File

@@ -52,7 +52,8 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
int comm_dim = rhs.Grid()->_processors[dimension] >1 ;
int splice_dim = rhs.Grid()->_simd_layout[dimension]>1 && (comm_dim);
RealD t1,t0;
t0=usecond();
if ( !comm_dim ) {
//std::cout << "CSHIFT: Cshift_local" <<std::endl;
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
@@ -63,6 +64,8 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
//std::cout << "CSHIFT: Cshift_comms" <<std::endl;
Cshift_comms(ret,rhs,dimension,shift);
}
t1=usecond();
// std::cout << GridLogPerformance << "Cshift took "<< (t1-t0)/1e3 << " ms"<<std::endl;
return ret;
}
@@ -127,16 +130,20 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
int cb= (cbmask==0x2)? Odd : Even;
int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
RealD tcopy=0.0;
RealD tgather=0.0;
RealD tscatter=0.0;
RealD tcomms=0.0;
uint64_t xbytes=0;
for(int x=0;x<rd;x++){
int sx = (x+sshift)%rd;
int comm_proc = ((x+sshift)/rd)%pd;
if (comm_proc==0) {
tcopy-=usecond();
Copy_plane(ret,rhs,dimension,x,sx,cbmask);
tcopy+=usecond();
} else {
int words = buffer_size;
@@ -144,26 +151,39 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
int bytes = words * sizeof(vobj);
tgather-=usecond();
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask);
tgather+=usecond();
// int rank = grid->_processor;
int recv_from_rank;
int xmit_to_rank;
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
grid->Barrier();
tcomms-=usecond();
// grid->Barrier();
grid->SendToRecvFrom((void *)&send_buf[0],
xmit_to_rank,
(void *)&recv_buf[0],
recv_from_rank,
bytes);
xbytes+=bytes;
// grid->Barrier();
tcomms+=usecond();
grid->Barrier();
tscatter-=usecond();
Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
tscatter+=usecond();
}
}
/*
std::cout << GridLogPerformance << " Cshift copy "<<tcopy/1e3<<" ms"<<std::endl;
std::cout << GridLogPerformance << " Cshift gather "<<tgather/1e3<<" ms"<<std::endl;
std::cout << GridLogPerformance << " Cshift scatter "<<tscatter/1e3<<" ms"<<std::endl;
std::cout << GridLogPerformance << " Cshift comm "<<tcomms/1e3<<" ms"<<std::endl;
std::cout << GridLogPerformance << " Cshift BW "<<(2.0*xbytes)/tcomms<<" MB/s "<<2*xbytes<< " Bytes "<<std::endl;
*/
}
template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
@@ -190,6 +210,12 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
assert(shift>=0);
assert(shift<fd);
RealD tcopy=0.0;
RealD tgather=0.0;
RealD tscatter=0.0;
RealD tcomms=0.0;
uint64_t xbytes=0;
int permute_type=grid->PermuteType(dimension);
///////////////////////////////////////////////
@@ -227,7 +253,9 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
pointers[i] = &send_buf_extract[i][0];
}
int sx = (x+sshift)%rd;
tgather-=usecond();
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
tgather+=usecond();
for(int i=0;i<Nsimd;i++){
@@ -252,7 +280,8 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
if(nbr_proc){
grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
grid->Barrier();
tcomms-=usecond();
// grid->Barrier();
send_buf_extract_mpi = &send_buf_extract[nbr_lane][0];
recv_buf_extract_mpi = &recv_buf_extract[i][0];
@@ -262,7 +291,9 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
recv_from_rank,
bytes);
grid->Barrier();
xbytes+=bytes;
// grid->Barrier();
tcomms+=usecond();
rpointers[i] = &recv_buf_extract[i][0];
} else {
@@ -270,9 +301,17 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
}
}
tscatter-=usecond();
Scatter_plane_merge(ret,rpointers,dimension,x,cbmask);
tscatter+=usecond();
}
/*
std::cout << GridLogPerformance << " Cshift (s) copy "<<tcopy/1e3<<" ms"<<std::endl;
std::cout << GridLogPerformance << " Cshift (s) gather "<<tgather/1e3<<" ms"<<std::endl;
std::cout << GridLogPerformance << " Cshift (s) scatter "<<tscatter/1e3<<" ms"<<std::endl;
std::cout << GridLogPerformance << " Cshift (s) comm "<<tcomms/1e3<<" ms"<<std::endl;
std::cout << GridLogPerformance << " Cshift BW "<<(2.0*xbytes)/tcomms<<" MB/s "<<2*xbytes<< " Bytes "<<std::endl;
*/
}
#else
template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
@@ -292,6 +331,11 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
assert(comm_dim==1);
assert(shift>=0);
assert(shift<fd);
RealD tcopy=0.0;
RealD tgather=0.0;
RealD tscatter=0.0;
RealD tcomms=0.0;
uint64_t xbytes=0;
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
static cshiftVector<vobj> send_buf_v; send_buf_v.resize(buffer_size);
@@ -315,7 +359,9 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
if (comm_proc==0) {
tcopy-=usecond();
Copy_plane(ret,rhs,dimension,x,sx,cbmask);
tcopy+=usecond();
} else {
@@ -324,7 +370,9 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
int bytes = words * sizeof(vobj);
tgather-=usecond();
Gather_plane_simple (rhs,send_buf_v,dimension,sx,cbmask);
tgather+=usecond();
// int rank = grid->_processor;
int recv_from_rank;
@@ -332,7 +380,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
grid->Barrier();
tcomms-=usecond();
// grid->Barrier();
acceleratorCopyDeviceToDevice((void *)&send_buf_v[0],(void *)&send_buf[0],bytes);
grid->SendToRecvFrom((void *)&send_buf[0],
@@ -340,13 +389,24 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
(void *)&recv_buf[0],
recv_from_rank,
bytes);
xbytes+=bytes;
acceleratorCopyDeviceToDevice((void *)&recv_buf[0],(void *)&recv_buf_v[0],bytes);
grid->Barrier();
// grid->Barrier();
tcomms+=usecond();
tscatter-=usecond();
Scatter_plane_simple (ret,recv_buf_v,dimension,x,cbmask);
tscatter+=usecond();
}
}
/*
std::cout << GridLogPerformance << " Cshift copy "<<tcopy/1e3<<" ms"<<std::endl;
std::cout << GridLogPerformance << " Cshift gather "<<tgather/1e3<<" ms"<<std::endl;
std::cout << GridLogPerformance << " Cshift scatter "<<tscatter/1e3<<" ms"<<std::endl;
std::cout << GridLogPerformance << " Cshift comm "<<tcomms/1e3<<" ms"<<std::endl;
std::cout << GridLogPerformance << " Cshift BW "<<(2.0*xbytes)/tcomms<<" MB/s "<<2*xbytes<< " Bytes "<<std::endl;
*/
}
template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
@@ -372,6 +432,11 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
assert(simd_layout==2);
assert(shift>=0);
assert(shift<fd);
RealD tcopy=0.0;
RealD tgather=0.0;
RealD tscatter=0.0;
RealD tcomms=0.0;
uint64_t xbytes=0;
int permute_type=grid->PermuteType(dimension);
@@ -414,8 +479,10 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
for(int i=0;i<Nsimd;i++){
pointers[i] = &send_buf_extract[i][0];
}
tgather-=usecond();
int sx = (x+sshift)%rd;
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
tgather+=usecond();
for(int i=0;i<Nsimd;i++){
@@ -440,7 +507,8 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
if(nbr_proc){
grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
grid->Barrier();
tcomms-=usecond();
// grid->Barrier();
acceleratorCopyDeviceToDevice((void *)&send_buf_extract[nbr_lane][0],(void *)send_buf_extract_mpi,bytes);
grid->SendToRecvFrom((void *)send_buf_extract_mpi,
@@ -449,17 +517,28 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
recv_from_rank,
bytes);
acceleratorCopyDeviceToDevice((void *)recv_buf_extract_mpi,(void *)&recv_buf_extract[i][0],bytes);
xbytes+=bytes;
grid->Barrier();
// grid->Barrier();
tcomms+=usecond();
rpointers[i] = &recv_buf_extract[i][0];
} else {
rpointers[i] = &send_buf_extract[nbr_lane][0];
}
}
tscatter-=usecond();
Scatter_plane_merge(ret,rpointers,dimension,x,cbmask);
}
tscatter+=usecond();
}
/*
std::cout << GridLogPerformance << " Cshift (s) copy "<<tcopy/1e3<<" ms"<<std::endl;
std::cout << GridLogPerformance << " Cshift (s) gather "<<tgather/1e3<<" ms"<<std::endl;
std::cout << GridLogPerformance << " Cshift (s) scatter "<<tscatter/1e3<<" ms"<<std::endl;
std::cout << GridLogPerformance << " Cshift (s) comm "<<tcomms/1e3<<" ms"<<std::endl;
std::cout << GridLogPerformance << " Cshift BW "<<(2.0*xbytes)/tcomms<<" MB/s"<<std::endl;
*/
}
#endif
NAMESPACE_END(Grid);

View File

@@ -1,4 +1,5 @@
#include <Grid/GridCore.h>
NAMESPACE_BEGIN(Grid);
Vector<std::pair<int,int> > Cshift_table;
std::vector<std::pair<int,int> > Cshift_table;
commVector<std::pair<int,int> > Cshift_table_device;
NAMESPACE_END(Grid);

View File

@@ -47,3 +47,4 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/lattice/Lattice_transfer.h>
#include <Grid/lattice/Lattice_basis.h>
#include <Grid/lattice/Lattice_crc.h>
#include <Grid/lattice/PaddedCell.h>

View File

@@ -345,7 +345,9 @@ GridUnopClass(UnaryNot, Not(a));
GridUnopClass(UnaryTrace, trace(a));
GridUnopClass(UnaryTranspose, transpose(a));
GridUnopClass(UnaryTa, Ta(a));
GridUnopClass(UnarySpTa, SpTa(a));
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
GridUnopClass(UnaryProjectOnSpGroup, ProjectOnSpGroup(a));
GridUnopClass(UnaryTimesI, timesI(a));
GridUnopClass(UnaryTimesMinusI, timesMinusI(a));
GridUnopClass(UnaryAbs, abs(a));
@@ -456,7 +458,9 @@ GRID_DEF_UNOP(operator!, UnaryNot);
GRID_DEF_UNOP(trace, UnaryTrace);
GRID_DEF_UNOP(transpose, UnaryTranspose);
GRID_DEF_UNOP(Ta, UnaryTa);
GRID_DEF_UNOP(SpTa, UnarySpTa);
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
GRID_DEF_UNOP(ProjectOnSpGroup, UnaryProjectOnSpGroup);
GRID_DEF_UNOP(timesI, UnaryTimesI);
GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI);
GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the

View File

@@ -270,5 +270,42 @@ RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const L
return axpby_norm_fast(ret,a,b,x,y);
}
/// Trace product
template<class obj> auto traceProduct(const Lattice<obj> &rhs_1,const Lattice<obj> &rhs_2)
-> Lattice<decltype(trace(obj()))>
{
typedef decltype(trace(obj())) robj;
Lattice<robj> ret_i(rhs_1.Grid());
autoView( rhs1 , rhs_1, AcceleratorRead);
autoView( rhs2 , rhs_2, AcceleratorRead);
autoView( ret , ret_i, AcceleratorWrite);
ret.Checkerboard() = rhs_1.Checkerboard();
accelerator_for(ss,rhs1.size(),obj::Nsimd(),{
coalescedWrite(ret[ss],traceProduct(rhs1(ss),rhs2(ss)));
});
return ret_i;
}
template<class obj1,class obj2> auto traceProduct(const Lattice<obj1> &rhs_1,const obj2 &rhs2)
-> Lattice<decltype(trace(obj1()))>
{
typedef decltype(trace(obj1())) robj;
Lattice<robj> ret_i(rhs_1.Grid());
autoView( rhs1 , rhs_1, AcceleratorRead);
autoView( ret , ret_i, AcceleratorWrite);
ret.Checkerboard() = rhs_1.Checkerboard();
accelerator_for(ss,rhs1.size(),obj1::Nsimd(),{
coalescedWrite(ret[ss],traceProduct(rhs1(ss),rhs2));
});
return ret_i;
}
template<class obj1,class obj2> auto traceProduct(const obj2 &rhs_2,const Lattice<obj1> &rhs_1)
-> Lattice<decltype(trace(obj1()))>
{
return traceProduct(rhs_1,rhs_2);
}
NAMESPACE_END(Grid);
#endif

View File

@@ -251,10 +251,10 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
autoView( right_v,right, AcceleratorRead);
// This code could read coalesce
// GPU - SIMT lane compliance...
accelerator_for( ss, sites, nsimd,{
auto x_l = left_v(ss);
auto y_l = right_v(ss);
coalescedWrite(inner_tmp_v[ss],innerProductD(x_l,y_l));
accelerator_for( ss, sites, 1,{
auto x_l = left_v[ss];
auto y_l = right_v[ss];
inner_tmp_v[ss]=innerProductD(x_l,y_l);
});
}
#else
@@ -267,11 +267,18 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
autoView( right_v,right, AcceleratorRead);
// GPU - SIMT lane compliance...
accelerator_for( ss, sites, nsimd,{
auto x_l = left_v(ss);
auto y_l = right_v(ss);
coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
});
//accelerator_for( ss, sites, nsimd,{
// auto x_l = left_v(ss);
// auto y_l = right_v(ss);
// coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
//});
#pragma omp target map ( to:left_v, right_v ) map ( tofrom:inner_tmp_v )
#pragma omp teams distribute parallel for thread_limit(THREAD_LIMIT) //nowait
for ( uint64_t ss=0;ss<sites;ss++) {
auto x_l = left_v[ss];
auto y_l = right_v[ss];
coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
}
}
#endif
// This is in single precision and fails some tests

View File

@@ -30,9 +30,12 @@ int getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator &
cudaGetDevice(&device);
#endif
#ifdef GRID_HIP
hipGetDevice(&device);
auto r=hipGetDevice(&device);
#endif
#ifdef GRID_OMPTARGET
device = omp_get_device_num();
#endif
Iterator warpSize = gpu_props[device].warpSize;
Iterator sharedMemPerBlock = gpu_props[device].sharedMemPerBlock;
Iterator maxThreadsPerBlock = gpu_props[device].maxThreadsPerBlock;

View File

@@ -152,6 +152,7 @@ public:
#ifdef RNG_FAST_DISCARD
static void Skip(RngEngine &eng,uint64_t site)
{
#if 0
/////////////////////////////////////////////////////////////////////////////////////
// Skip by 2^40 elements between successive lattice sites
// This goes by 10^12.
@@ -162,9 +163,9 @@ public:
// tens of seconds per trajectory so this is clean in all reasonable cases,
// and margin of safety is orders of magnitude.
// We could hack Sitmo to skip in the higher order words of state if necessary
//
// Replace with 2^30 ; avoid problem on large volumes
//
//
// Replace with 2^30 ; avoid problem on large volumes
//
/////////////////////////////////////////////////////////////////////////////////////
// uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init
const int shift = 30;
@@ -179,6 +180,9 @@ public:
assert((skip >> shift)==site); // check for overflow
eng.discard(skip);
#else
eng.discardhi(site);
#endif
// std::cout << " Engine " <<site << " state " <<eng<<std::endl;
}
#endif

View File

@@ -66,6 +66,65 @@ inline auto TraceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<
return ret;
};
template<int N, class Vec>
Lattice<iScalar<iScalar<iScalar<Vec> > > > Determinant(const Lattice<iScalar<iScalar<iMatrix<Vec, N> > > > &Umu)
{
GridBase *grid=Umu.Grid();
auto lvol = grid->lSites();
Lattice<iScalar<iScalar<iScalar<Vec> > > > ret(grid);
typedef typename Vec::scalar_type scalar;
autoView(Umu_v,Umu,CpuRead);
autoView(ret_v,ret,CpuWrite);
thread_for(site,lvol,{
Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
iScalar<iScalar<iMatrix<scalar, N> > > Us;
peekLocalSite(Us, Umu_v, lcoor);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
scalar tmp= Us()()(i,j);
ComplexD ztmp(real(tmp),imag(tmp));
EigenU(i,j)=ztmp;
}}
ComplexD detD = EigenU.determinant();
typename Vec::scalar_type det(detD.real(),detD.imag());
pokeLocalSite(det,ret_v,lcoor);
});
return ret;
}
template<int N>
Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > Inverse(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
{
GridBase *grid=Umu.Grid();
auto lvol = grid->lSites();
Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > ret(grid);
autoView(Umu_v,Umu,CpuRead);
autoView(ret_v,ret,CpuWrite);
thread_for(site,lvol,{
Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
iScalar<iScalar<iMatrix<ComplexD, N> > > Us;
iScalar<iScalar<iMatrix<ComplexD, N> > > Ui;
peekLocalSite(Us, Umu_v, lcoor);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
EigenU(i,j) = Us()()(i,j);
}}
Eigen::MatrixXcd EigenUinv = EigenU.inverse();
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
Ui()()(i,j) = EigenUinv(i,j);
}}
pokeLocalSite(Ui,ret_v,lcoor);
});
return ret;
}
NAMESPACE_END(Grid);
#endif

View File

@@ -697,8 +697,68 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
for(int d=0;d<nd;d++){
assert(Fg->_processors[d] == Tg->_processors[d]);
}
// the above should guarantee that the operations are local
#if 1
size_t nsite = 1;
for(int i=0;i<nd;i++) nsite *= RegionSize[i];
size_t tbytes = 4*nsite*sizeof(int);
int *table = (int*)malloc(tbytes);
thread_for(idx, nsite, {
Coordinate from_coor, to_coor;
size_t rem = idx;
for(int i=0;i<nd;i++){
size_t base_i = rem % RegionSize[i]; rem /= RegionSize[i];
from_coor[i] = base_i + FromLowerLeft[i];
to_coor[i] = base_i + ToLowerLeft[i];
}
int foidx = Fg->oIndex(from_coor);
int fiidx = Fg->iIndex(from_coor);
int toidx = Tg->oIndex(to_coor);
int tiidx = Tg->iIndex(to_coor);
int* tt = table + 4*idx;
tt[0] = foidx;
tt[1] = fiidx;
tt[2] = toidx;
tt[3] = tiidx;
});
int* table_d = (int*)acceleratorAllocDevice(tbytes);
acceleratorCopyToDevice(table,table_d,tbytes);
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
autoView(from_v,From,AcceleratorRead);
autoView(to_v,To,AcceleratorWrite);
accelerator_for(idx,nsite,1,{
static const int words=sizeof(vobj)/sizeof(vector_type);
int* tt = table_d + 4*idx;
int from_oidx = *tt++;
int from_lane = *tt++;
int to_oidx = *tt++;
int to_lane = *tt;
const vector_type* from = (const vector_type *)&from_v[from_oidx];
vector_type* to = (vector_type *)&to_v[to_oidx];
scalar_type stmp;
for(int w=0;w<words;w++){
stmp = getlane(from[w], from_lane);
putlane(to[w], stmp, to_lane);
}
});
acceleratorFreeDevice(table_d);
free(table);
#else
Coordinate ldf = Fg->_ldimensions;
Coordinate rdf = Fg->_rdimensions;
Coordinate isf = Fg->_istride;
@@ -707,9 +767,9 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
Coordinate ist = Tg->_istride;
Coordinate ost = Tg->_ostride;
autoView( t_v , To, AcceleratorWrite);
autoView( f_v , From, AcceleratorRead);
accelerator_for(idx,Fg->lSites(),1,{
autoView( t_v , To, CpuWrite);
autoView( f_v , From, CpuRead);
thread_for(idx,Fg->lSites(),{
sobj s;
Coordinate Fcoor(nd);
Coordinate Tcoor(nd);
@@ -722,17 +782,24 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
Tcoor[d] = ToLowerLeft[d]+ Fcoor[d]-FromLowerLeft[d];
}
if (in_region) {
Integer idx_f = 0; for(int d=0;d<nd;d++) idx_f+=isf[d]*(Fcoor[d]/rdf[d]);
Integer idx_t = 0; for(int d=0;d<nd;d++) idx_t+=ist[d]*(Tcoor[d]/rdt[d]);
Integer odx_f = 0; for(int d=0;d<nd;d++) odx_f+=osf[d]*(Fcoor[d]%rdf[d]);
Integer odx_t = 0; for(int d=0;d<nd;d++) odx_t+=ost[d]*(Tcoor[d]%rdt[d]);
vector_type * fp = (vector_type *)&f_v[odx_f];
vector_type * tp = (vector_type *)&t_v[odx_t];
#if 0
Integer idx_f = 0; for(int d=0;d<nd;d++) idx_f+=isf[d]*(Fcoor[d]/rdf[d]); // inner index from
Integer idx_t = 0; for(int d=0;d<nd;d++) idx_t+=ist[d]*(Tcoor[d]/rdt[d]); // inner index to
Integer odx_f = 0; for(int d=0;d<nd;d++) odx_f+=osf[d]*(Fcoor[d]%rdf[d]); // outer index from
Integer odx_t = 0; for(int d=0;d<nd;d++) odx_t+=ost[d]*(Tcoor[d]%rdt[d]); // outer index to
scalar_type * fp = (scalar_type *)&f_v[odx_f];
scalar_type * tp = (scalar_type *)&t_v[odx_t];
for(int w=0;w<words;w++){
tp[w].putlane(fp[w].getlane(idx_f),idx_t);
}
#else
peekLocalSite(s,f_v,Fcoor);
pokeLocalSite(s,t_v,Tcoor);
#endif
}
});
#endif
}
@@ -825,6 +892,8 @@ void ExtractSlice(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slic
}
//Insert subvolume orthogonal to direction 'orthog' with slice index 'slice_lo' from 'lowDim' onto slice index 'slice_hi' of higherDim
//The local dimensions of both 'lowDim' and 'higherDim' orthogonal to 'orthog' should be the same
template<class vobj>
void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{
@@ -841,11 +910,70 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
for(int d=0;d<nh;d++){
if ( d!=orthog ) {
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
}
#if 1
size_t nsite = lg->lSites()/lg->LocalDimensions()[orthog];
size_t tbytes = 4*nsite*sizeof(int);
int *table = (int*)malloc(tbytes);
thread_for(idx,nsite,{
Coordinate lcoor(nl);
Coordinate hcoor(nh);
lcoor[orthog] = slice_lo;
hcoor[orthog] = slice_hi;
size_t rem = idx;
for(int mu=0;mu<nl;mu++){
if(mu != orthog){
int xmu = rem % lg->LocalDimensions()[mu]; rem /= lg->LocalDimensions()[mu];
lcoor[mu] = hcoor[mu] = xmu;
}
}
int loidx = lg->oIndex(lcoor);
int liidx = lg->iIndex(lcoor);
int hoidx = hg->oIndex(hcoor);
int hiidx = hg->iIndex(hcoor);
int* tt = table + 4*idx;
tt[0] = loidx;
tt[1] = liidx;
tt[2] = hoidx;
tt[3] = hiidx;
});
int* table_d = (int*)acceleratorAllocDevice(tbytes);
acceleratorCopyToDevice(table,table_d,tbytes);
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
autoView(lowDim_v,lowDim,AcceleratorRead);
autoView(higherDim_v,higherDim,AcceleratorWrite);
accelerator_for(idx,nsite,1,{
static const int words=sizeof(vobj)/sizeof(vector_type);
int* tt = table_d + 4*idx;
int from_oidx = *tt++;
int from_lane = *tt++;
int to_oidx = *tt++;
int to_lane = *tt;
const vector_type* from = (const vector_type *)&lowDim_v[from_oidx];
vector_type* to = (vector_type *)&higherDim_v[to_oidx];
scalar_type stmp;
for(int w=0;w<words;w++){
stmp = getlane(from[w], from_lane);
putlane(to[w], stmp, to_lane);
}
});
acceleratorFreeDevice(table_d);
free(table);
#else
// the above should guarantee that the operations are local
autoView(lowDimv,lowDim,CpuRead);
autoView(higherDimv,higherDim,CpuWrite);
@@ -861,6 +989,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
pokeLocalSite(s,higherDimv,hcoor);
}
});
#endif
}

View File

@@ -79,7 +79,7 @@ public:
accelerator_inline uint64_t end(void) const { return this->_odata_size; };
accelerator_inline uint64_t size(void) const { return this->_odata_size; };
LatticeView(const LatticeAccelerator<vobj> &refer_to_me) : LatticeAccelerator<vobj> (refer_to_me){}
LatticeView(const LatticeAccelerator<vobj> &refer_to_me) : LatticeAccelerator<vobj> (refer_to_me){ }
LatticeView(const LatticeView<vobj> &refer_to_me) = default; // Trivially copyable
LatticeView(const LatticeAccelerator<vobj> &refer_to_me,ViewMode mode) : LatticeAccelerator<vobj> (refer_to_me)
{

174
Grid/lattice/PaddedCell.h Normal file
View File

@@ -0,0 +1,174 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/PaddedCell.h
Copyright (C) 2019
Author: Peter Boyle pboyle@bnl.gov
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include<Grid/cshift/Cshift.h>
NAMESPACE_BEGIN(Grid);
//Allow the user to specify how the C-shift is performed, e.g. to respect the appropriate boundary conditions
template<typename vobj>
struct CshiftImplBase{
virtual Lattice<vobj> Cshift(const Lattice<vobj> &in, int dir, int shift) const = 0;
virtual ~CshiftImplBase(){}
};
template<typename vobj>
struct CshiftImplDefault: public CshiftImplBase<vobj>{
Lattice<vobj> Cshift(const Lattice<vobj> &in, int dir, int shift) const override{ return Grid::Cshift(in,dir,shift); }
};
template<typename Gimpl>
struct CshiftImplGauge: public CshiftImplBase<typename Gimpl::GaugeLinkField::vector_object>{
typename Gimpl::GaugeLinkField Cshift(const typename Gimpl::GaugeLinkField &in, int dir, int shift) const override{ return Gimpl::CshiftLink(in,dir,shift); }
};
class PaddedCell {
public:
GridCartesian * unpadded_grid;
int dims;
int depth;
std::vector<GridCartesian *> grids;
~PaddedCell()
{
DeleteGrids();
}
PaddedCell(int _depth,GridCartesian *_grid)
{
unpadded_grid = _grid;
depth=_depth;
dims=_grid->Nd();
AllocateGrids();
Coordinate local =unpadded_grid->LocalDimensions();
for(int d=0;d<dims;d++){
assert(local[d]>=depth);
}
}
void DeleteGrids(void)
{
for(int d=0;d<grids.size();d++){
delete grids[d];
}
grids.resize(0);
};
void AllocateGrids(void)
{
Coordinate local =unpadded_grid->LocalDimensions();
Coordinate simd =unpadded_grid->_simd_layout;
Coordinate processors=unpadded_grid->_processors;
Coordinate plocal =unpadded_grid->LocalDimensions();
Coordinate global(dims);
// expand up one dim at a time
for(int d=0;d<dims;d++){
plocal[d] += 2*depth;
for(int d=0;d<dims;d++){
global[d] = plocal[d]*processors[d];
}
grids.push_back(new GridCartesian(global,simd,processors));
}
};
template<class vobj>
inline Lattice<vobj> Extract(const Lattice<vobj> &in) const
{
Lattice<vobj> out(unpadded_grid);
Coordinate local =unpadded_grid->LocalDimensions();
Coordinate fll(dims,depth); // depends on the MPI spread
Coordinate tll(dims,0); // depends on the MPI spread
localCopyRegion(in,out,fll,tll,local);
return out;
}
template<class vobj>
inline Lattice<vobj> Exchange(const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
{
GridBase *old_grid = in.Grid();
int dims = old_grid->Nd();
Lattice<vobj> tmp = in;
for(int d=0;d<dims;d++){
tmp = Expand(d,tmp,cshift); // rvalue && assignment
}
return tmp;
}
// expand up one dim at a time
template<class vobj>
inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
{
GridBase *old_grid = in.Grid();
GridCartesian *new_grid = grids[dim];//These are new grids
Lattice<vobj> padded(new_grid);
Lattice<vobj> shifted(old_grid);
Coordinate local =old_grid->LocalDimensions();
Coordinate plocal =new_grid->LocalDimensions();
if(dim==0) conformable(old_grid,unpadded_grid);
else conformable(old_grid,grids[dim-1]);
std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl;
double tins=0, tshift=0;
// Middle bit
double t = usecond();
for(int x=0;x<local[dim];x++){
InsertSliceLocal(in,padded,x,depth+x,dim);
}
tins += usecond() - t;
// High bit
t = usecond();
shifted = cshift.Cshift(in,dim,depth);
tshift += usecond() - t;
t=usecond();
for(int x=0;x<depth;x++){
InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim);
}
tins += usecond() - t;
// Low bit
t = usecond();
shifted = cshift.Cshift(in,dim,-depth);
tshift += usecond() - t;
t = usecond();
for(int x=0;x<depth;x++){
InsertSliceLocal(shifted,padded,x,x,dim);
}
tins += usecond() - t;
std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl;
return padded;
}
};
NAMESPACE_END(Grid);

View File

@@ -104,6 +104,7 @@ template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iSca
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
template<typename vtype> using iLorentzComplex = iVector<iScalar<iScalar<vtype> >, Nd > ;
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
@@ -178,6 +179,15 @@ typedef iLorentzColourMatrix<vComplexF> vLorentzColourMatrixF;
typedef iLorentzColourMatrix<vComplexD> vLorentzColourMatrixD;
typedef iLorentzColourMatrix<vComplexD2> vLorentzColourMatrixD2;
// LorentzComplex
typedef iLorentzComplex<Complex > LorentzComplex;
typedef iLorentzComplex<ComplexF > LorentzComplexF;
typedef iLorentzComplex<ComplexD > LorentzComplexD;
typedef iLorentzComplex<vComplex > vLorentzComplex;
typedef iLorentzComplex<vComplexF> vLorentzComplexF;
typedef iLorentzComplex<vComplexD> vLorentzComplexD;
// DoubleStored gauge field
typedef iDoubleStoredColourMatrix<Complex > DoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<ComplexF > DoubleStoredColourMatrixF;
@@ -307,6 +317,10 @@ typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD;
typedef Lattice<vLorentzColourMatrixD2> LatticeLorentzColourMatrixD2;
typedef Lattice<vLorentzComplex> LatticeLorentzComplex;
typedef Lattice<vLorentzComplexF> LatticeLorentzComplexF;
typedef Lattice<vLorentzComplexD> LatticeLorentzComplexD;
// DoubleStored gauge field
typedef Lattice<vDoubleStoredColourMatrix> LatticeDoubleStoredColourMatrix;
typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF;

View File

@@ -34,10 +34,24 @@ directory
NAMESPACE_BEGIN(Grid);
///////////////////////////////////
// Smart configuration base class
///////////////////////////////////
template< class Field >
class ConfigurationBase
{
public:
ConfigurationBase() {}
virtual ~ConfigurationBase() {}
virtual void set_Field(Field& U) =0;
virtual void smeared_force(Field&) = 0;
virtual Field& get_SmearedU() =0;
virtual Field &get_U(bool smeared = false) = 0;
};
template <class GaugeField >
class Action
{
public:
bool is_smeared = false;
RealD deriv_norm_sum;
@@ -77,16 +91,60 @@ public:
void refresh_timer_stop(void) { refresh_us+=usecond(); }
void S_timer_start(void) { S_us-=usecond(); }
void S_timer_stop(void) { S_us+=usecond(); }
/////////////////////////////
// Heatbath?
/////////////////////////////
virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
virtual RealD S(const GaugeField& U) = 0; // evaluate the action
virtual RealD Sinitial(const GaugeField& U) { return this->S(U); } ; // if the refresh computes the action, can cache it. Alternately refreshAndAction() ?
virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0; // evaluate the action derivative
/////////////////////////////////////////////////////////////
// virtual smeared interface through configuration container
/////////////////////////////////////////////////////////////
virtual void refresh(ConfigurationBase<GaugeField> & U, GridSerialRNG &sRNG, GridParallelRNG& pRNG)
{
refresh(U.get_U(is_smeared),sRNG,pRNG);
}
virtual RealD S(ConfigurationBase<GaugeField>& U)
{
return S(U.get_U(is_smeared));
}
virtual RealD Sinitial(ConfigurationBase<GaugeField>& U)
{
return Sinitial(U.get_U(is_smeared));
}
virtual void deriv(ConfigurationBase<GaugeField>& U, GaugeField& dSdU)
{
deriv(U.get_U(is_smeared),dSdU);
if ( is_smeared ) {
U.smeared_force(dSdU);
}
}
///////////////////////////////
// Logging
///////////////////////////////
virtual std::string action_name() = 0; // return the action name
virtual std::string LogParameters() = 0; // prints action parameters
virtual ~Action(){}
};
template <class GaugeField >
class EmptyAction : public Action <GaugeField>
{
virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) { assert(0);}; // refresh pseudofermions
virtual RealD S(const GaugeField& U) { return 0.0;}; // evaluate the action
virtual void deriv(const GaugeField& U, GaugeField& dSdU) { assert(0); }; // evaluate the action derivative
///////////////////////////////
// Logging
///////////////////////////////
virtual std::string action_name() { return std::string("Level Force Log"); };
virtual std::string LogParameters() { return std::string("No parameters");};
};
NAMESPACE_END(Grid);
#endif // ACTION_BASE_H

View File

@@ -30,6 +30,8 @@ directory
#ifndef QCD_ACTION_CORE
#define QCD_ACTION_CORE
#include <Grid/qcd/action/gauge/GaugeImplementations.h>
#include <Grid/qcd/action/ActionBase.h>
NAMESPACE_CHECK(ActionBase);
#include <Grid/qcd/action/ActionSet.h>

View File

@@ -126,6 +126,16 @@ typedef WilsonFermion<WilsonTwoIndexSymmetricImplD> WilsonTwoIndexSymmetricFermi
typedef WilsonFermion<WilsonTwoIndexAntiSymmetricImplF> WilsonTwoIndexAntiSymmetricFermionF;
typedef WilsonFermion<WilsonTwoIndexAntiSymmetricImplD> WilsonTwoIndexAntiSymmetricFermionD;
// Sp(2n)
typedef WilsonFermion<SpWilsonImplF> SpWilsonFermionF;
typedef WilsonFermion<SpWilsonImplD> SpWilsonFermionD;
typedef WilsonFermion<SpWilsonTwoIndexAntiSymmetricImplF> SpWilsonTwoIndexAntiSymmetricFermionF;
typedef WilsonFermion<SpWilsonTwoIndexAntiSymmetricImplD> SpWilsonTwoIndexAntiSymmetricFermionD;
typedef WilsonFermion<SpWilsonTwoIndexSymmetricImplF> SpWilsonTwoIndexSymmetricFermionF;
typedef WilsonFermion<SpWilsonTwoIndexSymmetricImplD> SpWilsonTwoIndexSymmetricFermionD;
// Twisted mass fermion
typedef WilsonTMFermion<WilsonImplD2> WilsonTMFermionD2;
typedef WilsonTMFermion<WilsonImplF> WilsonTMFermionF;

View File

@@ -507,6 +507,7 @@ public:
}
this->face_table_computed=1;
assert(this->u_comm_offset==this->_unified_buffer_size);
accelerator_barrier();
}
};

View File

@@ -261,6 +261,22 @@ typedef WilsonImpl<vComplex, TwoIndexAntiSymmetricRepresentation, CoeffReal > W
typedef WilsonImpl<vComplexF, TwoIndexAntiSymmetricRepresentation, CoeffReal > WilsonTwoIndexAntiSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, TwoIndexAntiSymmetricRepresentation, CoeffReal > WilsonTwoIndexAntiSymmetricImplD; // Double
//sp 2n
typedef WilsonImpl<vComplex, SpFundamentalRepresentation, CoeffReal > SpWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, SpFundamentalRepresentation, CoeffReal > SpWilsonImplF; // Float
typedef WilsonImpl<vComplexD, SpFundamentalRepresentation, CoeffReal > SpWilsonImplD; // Double
typedef WilsonImpl<vComplex, SpTwoIndexAntiSymmetricRepresentation, CoeffReal > SpWilsonTwoIndexAntiSymmetricImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, SpTwoIndexAntiSymmetricRepresentation, CoeffReal > SpWilsonTwoIndexAntiSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, SpTwoIndexAntiSymmetricRepresentation, CoeffReal > SpWilsonTwoIndexAntiSymmetricImplD; // Double
typedef WilsonImpl<vComplex, SpTwoIndexSymmetricRepresentation, CoeffReal > SpWilsonTwoIndexSymmetricImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, SpTwoIndexSymmetricRepresentation, CoeffReal > SpWilsonTwoIndexSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, SpTwoIndexSymmetricRepresentation, CoeffReal > SpWilsonTwoIndexSymmetricImplD; // Double
typedef WilsonImpl<vComplex, SpTwoIndexSymmetricRepresentation, CoeffReal > SpWilsonAdjImplR; // Real.. whichever prec // adj = 2indx symmetric for Sp(2N)
typedef WilsonImpl<vComplexF, SpTwoIndexSymmetricRepresentation, CoeffReal > SpWilsonAdjImplF; // Float // adj = 2indx symmetric for Sp(2N)
typedef WilsonImpl<vComplexD, SpTwoIndexSymmetricRepresentation, CoeffReal > SpWilsonAdjImplD; // Double // adj = 2indx symmetric for Sp(2N)
NAMESPACE_END(Grid);

View File

@@ -332,8 +332,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
/////////////////////////////
{
GRID_TRACE("Gather");
st.HaloExchangeOptGather(in,compressor);
accelerator_barrier();
st.HaloExchangeOptGather(in,compressor); // Put the barrier in the routine
}
std::vector<std::vector<CommsRequest_t> > requests;

View File

@@ -423,14 +423,14 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
#define KERNEL_CALL_EXT(A) \
const uint64_t NN = Nsite*Ls; \
const uint64_t sz = st.surface_list.size(); \
auto ptr = &st.surface_list[0]; \
accelerator_forNB( ss, sz, Simd::Nsimd(), { \
int sF = ptr[ss]; \
int sU = ss/Ls; \
int sU = sF/Ls; \
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
});
}); \
accelerator_barrier();
#define ASM_CALL(A) \
thread_for( sss, Nsite, { \
@@ -474,9 +474,10 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;}
#endif
} else if( exterior ) {
// dependent on result of merge
acceleratorFenceComputeStream();
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL_EXT(GenericDhopSiteExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteExt); return;}
#ifndef GRID_CUDA
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); return;}
#endif
@@ -506,9 +507,10 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;}
#endif
} else if( exterior ) {
// Dependent on result of merge
acceleratorFenceComputeStream();
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;}
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL_EXT(GenericDhopSiteDagExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteDagExt); return;}
#ifndef GRID_CUDA
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;}
#endif

View File

@@ -0,0 +1 @@
../WilsonCloverFermionInstantiation.cc.master

View File

@@ -0,0 +1 @@
../WilsonFermionInstantiation.cc.master

View File

@@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@@ -0,0 +1 @@
../WilsonTMFermionInstantiation.cc.master

View File

@@ -0,0 +1 @@
#define IMPLEMENTATION SpWilsonImplD

View File

@@ -0,0 +1 @@
../WilsonCloverFermionInstantiation.cc.master

View File

@@ -0,0 +1 @@
../WilsonFermionInstantiation.cc.master

View File

@@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@@ -0,0 +1 @@
../WilsonTMFermionInstantiation.cc.master

View File

@@ -0,0 +1 @@
#define IMPLEMENTATION SpWilsonImplF

View File

@@ -0,0 +1 @@
#define IMPLEMENTATION SpWilsonTwoIndexAntiSymmetricImplD

View File

@@ -0,0 +1 @@
#define IMPLEMENTATION SpWilsonTwoIndexAntiSymmetricImplF

View File

@@ -0,0 +1 @@
#define IMPLEMENTATION SpWilsonTwoIndexSymmetricImplD

View File

@@ -0,0 +1 @@
#define IMPLEMENTATION SpWilsonTwoIndexSymmetricImplF

View File

@@ -10,12 +10,18 @@ WILSON_IMPL_LIST=" \
WilsonImplF \
WilsonImplD \
WilsonImplD2 \
SpWilsonImplF \
SpWilsonImplD \
WilsonAdjImplF \
WilsonAdjImplD \
WilsonTwoIndexSymmetricImplF \
WilsonTwoIndexSymmetricImplD \
WilsonTwoIndexAntiSymmetricImplF \
WilsonTwoIndexAntiSymmetricImplD \
SpWilsonTwoIndexAntiSymmetricImplF \
SpWilsonTwoIndexAntiSymmetricImplD \
SpWilsonTwoIndexSymmetricImplF \
SpWilsonTwoIndexSymmetricImplD \
GparityWilsonImplF \
GparityWilsonImplD "

View File

@@ -39,6 +39,9 @@ NAMESPACE_BEGIN(Grid);
typedef WilsonGaugeAction<PeriodicGimplR> WilsonGaugeActionR;
typedef WilsonGaugeAction<PeriodicGimplF> WilsonGaugeActionF;
typedef WilsonGaugeAction<PeriodicGimplD> WilsonGaugeActionD;
typedef WilsonGaugeAction<SpPeriodicGimplR> SpWilsonGaugeActionR;
typedef WilsonGaugeAction<SpPeriodicGimplF> SpWilsonGaugeActionF;
typedef WilsonGaugeAction<SpPeriodicGimplD> SpWilsonGaugeActionD;
typedef PlaqPlusRectangleAction<PeriodicGimplR> PlaqPlusRectangleActionR;
typedef PlaqPlusRectangleAction<PeriodicGimplF> PlaqPlusRectangleActionF;
typedef PlaqPlusRectangleAction<PeriodicGimplD> PlaqPlusRectangleActionD;

View File

@@ -61,7 +61,7 @@ NAMESPACE_BEGIN(Grid);
typedef typename Impl::Field Field;
// hardcodes the exponential approximation in the template
template <class S, int Nrepresentation = Nc, int Nexp = 12 > class GaugeImplTypes {
template <class S, int Nrepresentation = Nc, int Nexp = 12, class Group = SU<Nc> > class GaugeImplTypes {
public:
typedef S Simd;
typedef typename Simd::scalar_type scalar_type;
@@ -78,8 +78,6 @@ public:
typedef Lattice<SiteLink> LinkField;
typedef Lattice<SiteField> Field;
typedef SU<Nrepresentation> Group;
// Guido: we can probably separate the types from the HMC functions
// this will create 2 kind of implementations
// probably confusing the users
@@ -119,6 +117,7 @@ public:
//
LinkField Pmu(P.Grid());
Pmu = Zero();
for (int mu = 0; mu < Nd; mu++) {
Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu);
RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR) ;
@@ -126,8 +125,12 @@ public:
PokeIndex<LorentzIndex>(P, Pmu, mu);
}
}
static inline Field projectForce(Field &P) { return Ta(P); }
static inline Field projectForce(Field &P) {
Field ret(P.Grid());
Group::taProj(P, ret);
return ret;
}
static inline void update_field(Field& P, Field& U, double ep){
//static std::chrono::duration<double> diff;
@@ -137,14 +140,15 @@ public:
autoView(P_v,P,AcceleratorRead);
accelerator_for(ss, P.Grid()->oSites(),1,{
for (int mu = 0; mu < Nd; mu++) {
U_v[ss](mu) = ProjectOnGroup(Exponentiate(P_v[ss](mu), ep, Nexp) * U_v[ss](mu));
U_v[ss](mu) = Exponentiate(P_v[ss](mu), ep, Nexp) * U_v[ss](mu);
U_v[ss](mu) = Group::ProjectOnGeneralGroup(U_v[ss](mu));
}
});
//auto end = std::chrono::high_resolution_clock::now();
// diff += end - start;
// std::cout << "Time to exponentiate matrix " << diff.count() << " s\n";
}
static inline RealD FieldSquareNorm(Field& U){
LatticeComplex Hloc(U.Grid());
Hloc = Zero();
@@ -157,7 +161,7 @@ public:
}
static inline void Project(Field &U) {
ProjectSUn(U);
Group::ProjectOnSpecialGroup(U);
}
static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
@@ -171,6 +175,7 @@ public:
static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) {
Group::ColdConfiguration(pRNG, U);
}
};
@@ -178,10 +183,17 @@ typedef GaugeImplTypes<vComplex, Nc> GimplTypesR;
typedef GaugeImplTypes<vComplexF, Nc> GimplTypesF;
typedef GaugeImplTypes<vComplexD, Nc> GimplTypesD;
typedef GaugeImplTypes<vComplex, Nc, 12, Sp<Nc> > SpGimplTypesR;
typedef GaugeImplTypes<vComplexF, Nc, 12, Sp<Nc> > SpGimplTypesF;
typedef GaugeImplTypes<vComplexD, Nc, 12, Sp<Nc> > SpGimplTypesD;
typedef GaugeImplTypes<vComplex, SU<Nc>::AdjointDimension> GimplAdjointTypesR;
typedef GaugeImplTypes<vComplexF, SU<Nc>::AdjointDimension> GimplAdjointTypesF;
typedef GaugeImplTypes<vComplexD, SU<Nc>::AdjointDimension> GimplAdjointTypesD;
NAMESPACE_END(Grid);
#endif // GRID_GAUGE_IMPL_TYPES_H

View File

@@ -176,7 +176,7 @@ public:
return PeriodicBC::CshiftLink(Link,mu,shift);
}
static inline void setDirections(std::vector<int> &conjDirs) { _conjDirs=conjDirs; }
static inline void setDirections(const std::vector<int> &conjDirs) { _conjDirs=conjDirs; }
static inline std::vector<int> getDirections(void) { return _conjDirs; }
static inline bool isPeriodicGaugeField(void) { return false; }
};
@@ -193,6 +193,11 @@ typedef ConjugateGaugeImpl<GimplTypesR> ConjugateGimplR; // Real.. whichever pre
typedef ConjugateGaugeImpl<GimplTypesF> ConjugateGimplF; // Float
typedef ConjugateGaugeImpl<GimplTypesD> ConjugateGimplD; // Double
typedef PeriodicGaugeImpl<SpGimplTypesR> SpPeriodicGimplR; // Real.. whichever prec
typedef PeriodicGaugeImpl<SpGimplTypesF> SpPeriodicGimplF; // Float
typedef PeriodicGaugeImpl<SpGimplTypesD> SpPeriodicGimplD; // Double
NAMESPACE_END(Grid);
#endif

View File

@@ -43,7 +43,7 @@ public:
private:
RealD c_plaq;
RealD c_rect;
typename WilsonLoops<Gimpl>::StapleAndRectStapleAllWorkspace workspace;
public:
PlaqPlusRectangleAction(RealD b,RealD c): c_plaq(b),c_rect(c){};
@@ -79,27 +79,18 @@ public:
GridBase *grid = Umu.Grid();
std::vector<GaugeLinkField> U (Nd,grid);
std::vector<GaugeLinkField> U2(Nd,grid);
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
WilsonLoops<Gimpl>::RectStapleDouble(U2[mu],U[mu],mu);
}
std::vector<GaugeLinkField> RectStaple(Nd,grid), Staple(Nd,grid);
WilsonLoops<Gimpl>::StapleAndRectStapleAll(Staple, RectStaple, U, workspace);
GaugeLinkField dSdU_mu(grid);
GaugeLinkField staple(grid);
for (int mu=0; mu < Nd; mu++){
// Staple in direction mu
WilsonLoops<Gimpl>::Staple(staple,Umu,mu);
dSdU_mu = Ta(U[mu]*staple)*factor_p;
WilsonLoops<Gimpl>::RectStaple(Umu,staple,U2,U,mu);
dSdU_mu = dSdU_mu + Ta(U[mu]*staple)*factor_r;
dSdU_mu = Ta(U[mu]*Staple[mu])*factor_p;
dSdU_mu = dSdU_mu + Ta(U[mu]*RectStaple[mu])*factor_r;
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
}

View File

@@ -38,91 +38,73 @@ NAMESPACE_BEGIN(Grid);
// cf. GeneralEvenOddRational.h for details
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class ImplD, class ImplF, class ImplD2>
template<class ImplD, class ImplF>
class GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction : public GeneralEvenOddRatioRationalPseudoFermionAction<ImplD> {
private:
typedef typename ImplD2::FermionField FermionFieldD2;
typedef typename ImplD::FermionField FermionFieldD;
typedef typename ImplF::FermionField FermionFieldF;
FermionOperator<ImplD> & NumOpD;
FermionOperator<ImplD> & DenOpD;
FermionOperator<ImplD2> & NumOpD2;
FermionOperator<ImplD2> & DenOpD2;
FermionOperator<ImplF> & NumOpF;
FermionOperator<ImplF> & DenOpF;
Integer ReliableUpdateFreq;
protected:
//Action evaluation
//Allow derived classes to override the multishift CG
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){
#if 0
#if 1
SchurDifferentiableOperator<ImplD> schurOp(numerator ? NumOpD : DenOpD);
ConjugateGradientMultiShift<FermionFieldD> msCG(MaxIter, approx);
msCG(schurOp,in, out);
#else
SchurDifferentiableOperator<ImplD2> schurOpD2(numerator ? NumOpD2 : DenOpD2);
SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD);
SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
FermionFieldD2 inD2(NumOpD2.FermionRedBlackGrid());
FermionFieldD2 outD2(NumOpD2.FermionRedBlackGrid());
FermionFieldD inD(NumOpD.FermionRedBlackGrid());
FermionFieldD outD(NumOpD.FermionRedBlackGrid());
// Action better with higher precision?
ConjugateGradientMultiShiftMixedPrec<FermionFieldD2, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
precisionChange(inD2,in);
std::cout << "msCG single solve "<<norm2(inD2)<<" " <<norm2(in)<<std::endl;
msCG(schurOpD2, inD2, outD2);
precisionChange(out,outD2);
ConjugateGradientMultiShiftMixedPrec<FermionFieldD, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
msCG(schurOpD, in, out);
#endif
}
//Force evaluation
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){
SchurDifferentiableOperator<ImplD2> schurOpD2(numerator ? NumOpD2 : DenOpD2);
SchurDifferentiableOperator<ImplF> schurOpF (numerator ? NumOpF : DenOpF);
SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD);
SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
FermionFieldD2 inD2(NumOpD2.FermionRedBlackGrid());
FermionFieldD2 outD2(NumOpD2.FermionRedBlackGrid());
std::vector<FermionFieldD2> out_elemsD2(out_elems.size(),NumOpD2.FermionRedBlackGrid());
ConjugateGradientMultiShiftMixedPrecCleanup<FermionFieldD2, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
precisionChange(inD2,in);
std::cout << "msCG in "<<norm2(inD2)<<" " <<norm2(in)<<std::endl;
msCG(schurOpD2, inD2, out_elemsD2, outD2);
precisionChange(out,outD2);
for(int i=0;i<out_elems.size();i++){
precisionChange(out_elems[i],out_elemsD2[i]);
}
FermionFieldD inD(NumOpD.FermionRedBlackGrid());
FermionFieldD outD(NumOpD.FermionRedBlackGrid());
std::vector<FermionFieldD> out_elemsD(out_elems.size(),NumOpD.FermionRedBlackGrid());
ConjugateGradientMultiShiftMixedPrecCleanup<FermionFieldD, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
msCG(schurOpD, in, out_elems, out);
}
//Allow derived classes to override the gauge import
virtual void ImportGauge(const typename ImplD::GaugeField &Ud){
typename ImplF::GaugeField Uf(NumOpF.GaugeGrid());
typename ImplD2::GaugeField Ud2(NumOpD2.GaugeGrid());
precisionChange(Uf, Ud);
precisionChange(Ud2, Ud);
std::cout << "Importing "<<norm2(Ud)<<" "<< norm2(Uf)<<" " << norm2(Ud2)<<std::endl;
std::cout << "Importing "<<norm2(Ud)<<" "<< norm2(Uf)<<" " <<std::endl;
NumOpD.ImportGauge(Ud);
DenOpD.ImportGauge(Ud);
NumOpF.ImportGauge(Uf);
DenOpF.ImportGauge(Uf);
NumOpD2.ImportGauge(Ud2);
DenOpD2.ImportGauge(Ud2);
}
public:
GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction(FermionOperator<ImplD> &_NumOpD, FermionOperator<ImplD> &_DenOpD,
FermionOperator<ImplF> &_NumOpF, FermionOperator<ImplF> &_DenOpF,
FermionOperator<ImplD2> &_NumOpD2, FermionOperator<ImplD2> &_DenOpD2,
const RationalActionParams & p, Integer _ReliableUpdateFreq
) : GeneralEvenOddRatioRationalPseudoFermionAction<ImplD>(_NumOpD, _DenOpD, p),
ReliableUpdateFreq(_ReliableUpdateFreq),
NumOpD(_NumOpD), DenOpD(_DenOpD),
NumOpF(_NumOpF), DenOpF(_DenOpF),
NumOpD2(_NumOpD2), DenOpD2(_DenOpD2)
NumOpF(_NumOpF), DenOpF(_DenOpF)
{}
virtual std::string action_name(){return "GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction";}

View File

@@ -67,9 +67,9 @@ NAMESPACE_BEGIN(Grid);
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
};
template<class Impl,class ImplF,class ImplD2>
template<class Impl,class ImplF>
class OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction
: public GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF,ImplD2> {
: public GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF> {
public:
typedef OneFlavourRationalParams Params;
private:
@@ -91,11 +91,9 @@ NAMESPACE_BEGIN(Grid);
FermionOperator<Impl> &_DenOp,
FermionOperator<ImplF> &_NumOpF,
FermionOperator<ImplF> &_DenOpF,
FermionOperator<ImplD2> &_NumOpD2,
FermionOperator<ImplD2> &_DenOpD2,
const Params & p, Integer ReliableUpdateFreq
) :
GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF,ImplD2>(_NumOp, _DenOp,_NumOpF, _DenOpF,_NumOpD2, _DenOpD2, transcribe(p),ReliableUpdateFreq){}
GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF>(_NumOp, _DenOp,_NumOpF, _DenOpF, transcribe(p),ReliableUpdateFreq){}
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
};

View File

@@ -207,20 +207,27 @@ NAMESPACE_BEGIN(Grid);
//X = (Mdag M)^-1 V^dag phi
//Y = (Mdag)^-1 V^dag phi
Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi
std::cout << GridLogMessage <<" Y "<<norm2(Y)<<std::endl;
X=Zero();
DerivativeSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi
std::cout << GridLogMessage <<" X "<<norm2(X)<<std::endl;
Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi
std::cout << GridLogMessage <<" Y "<<norm2(Y)<<std::endl;
// phi^dag V (Mdag M)^-1 dV^dag phi
Vpc.MpcDagDeriv(force , X, PhiOdd ); dSdU = force;
std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl;
// phi^dag dV (Mdag M)^-1 V^dag phi
Vpc.MpcDeriv(force , PhiOdd, X ); dSdU = dSdU+force;
std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl;
// - phi^dag V (Mdag M)^-1 Mdag dM (Mdag M)^-1 V^dag phi
// - phi^dag V (Mdag M)^-1 dMdag M (Mdag M)^-1 V^dag phi
Mpc.MpcDeriv(force,Y,X); dSdU = dSdU-force;
std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl;
Mpc.MpcDagDeriv(force,X,Y); dSdU = dSdU-force;
std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl;
// FIXME No force contribution from EvenEven assumed here
// Needs a fix for clover.

View File

@@ -225,6 +225,18 @@ template <class RepresentationsPolicy,
using GenericHMCRunnerHirep =
HMCWrapperTemplate<PeriodicGimplR, Integrator, RepresentationsPolicy>;
// sp2n
template <template <typename, typename, typename> class Integrator>
using GenericSpHMCRunner = HMCWrapperTemplate<SpPeriodicGimplR, Integrator>;
template <class RepresentationsPolicy,
template <typename, typename, typename> class Integrator>
using GenericSpHMCRunnerHirep =
HMCWrapperTemplate<SpPeriodicGimplR, Integrator, RepresentationsPolicy>;
template <class Implementation, class RepresentationsPolicy,
template <typename, typename, typename> class Integrator>
using GenericHMCRunnerTemplate = HMCWrapperTemplate<Implementation, Integrator, RepresentationsPolicy>;

View File

@@ -283,12 +283,13 @@ public:
std::cout << GridLogHMC << "Total time for trajectory (s): " << (t1-t0)/1e6 << std::endl;
TheIntegrator.print_timer();
TheIntegrator.Smearer.set_Field(Ucur);
for (int obs = 0; obs < Observables.size(); obs++) {
std::cout << GridLogDebug << "Observables # " << obs << std::endl;
std::cout << GridLogDebug << "Observables total " << Observables.size() << std::endl;
std::cout << GridLogDebug << "Observables pointer " << Observables[obs] << std::endl;
Observables[obs]->TrajectoryComplete(traj + 1, Ucur, sRNG, pRNG);
Observables[obs]->TrajectoryComplete(traj + 1, TheIntegrator.Smearer, sRNG, pRNG);
}
std::cout << GridLogHMC << ":::::::::::::::::::::::::::::::::::::::::::" << std::endl;
}

View File

@@ -35,13 +35,16 @@ class CheckpointerParameters : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(CheckpointerParameters,
std::string, config_prefix,
std::string, smeared_prefix,
std::string, rng_prefix,
int, saveInterval,
bool, saveSmeared,
std::string, format, );
CheckpointerParameters(std::string cf = "cfg", std::string rn = "rng",
CheckpointerParameters(std::string cf = "cfg", std::string sf="cfg_smr" , std::string rn = "rng",
int savemodulo = 1, const std::string &f = "IEEE64BIG")
: config_prefix(cf),
smeared_prefix(sf),
rng_prefix(rn),
saveInterval(savemodulo),
format(f){};
@@ -61,13 +64,21 @@ template <class Impl>
class BaseHmcCheckpointer : public HmcObservable<typename Impl::Field> {
public:
void build_filenames(int traj, CheckpointerParameters &Params,
std::string &conf_file, std::string &rng_file) {
std::string &conf_file,
std::string &smear_file,
std::string &rng_file) {
{
std::ostringstream os;
os << Params.rng_prefix << "." << traj;
rng_file = os.str();
}
{
std::ostringstream os;
os << Params.smeared_prefix << "." << traj;
smear_file = os.str();
}
{
std::ostringstream os;
os << Params.config_prefix << "." << traj;
@@ -84,6 +95,11 @@ public:
}
virtual void initialize(const CheckpointerParameters &Params) = 0;
virtual void TrajectoryComplete(int traj,
typename Impl::Field &U,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG) { assert(0); } ; // HMC should pass the smart config with smeared and unsmeared
virtual void CheckpointRestore(int traj, typename Impl::Field &U,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG) = 0;

View File

@@ -61,11 +61,14 @@ public:
fout.close();
}
void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) {
void TrajectoryComplete(int traj,
ConfigurationBase<Field> &SmartConfig,
GridSerialRNG &sRNG, GridParallelRNG &pRNG)
{
if ((traj % Params.saveInterval) == 0) {
std::string config, rng;
this->build_filenames(traj, Params, config, rng);
std::string config, rng, smr;
this->build_filenames(traj, Params, config, smr, rng);
uint32_t nersc_csum;
uint32_t scidac_csuma;
@@ -74,9 +77,15 @@ public:
BinarySimpleUnmunger<sobj_double, sobj> munge;
truncate(rng);
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
truncate(config);
std::cout << GridLogMessage << "Written Binary RNG " << rng
<< " checksum " << std::hex
<< nersc_csum <<"/"
<< scidac_csuma <<"/"
<< scidac_csumb
<< std::dec << std::endl;
BinaryIO::writeLatticeObject<vobj, sobj_double>(U, config, munge, 0, Params.format,
truncate(config);
BinaryIO::writeLatticeObject<vobj, sobj_double>(SmartConfig.get_U(false), config, munge, 0, Params.format,
nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << "Written Binary Configuration " << config
@@ -85,6 +94,18 @@ public:
<< scidac_csuma <<"/"
<< scidac_csumb
<< std::dec << std::endl;
if ( Params.saveSmeared ) {
truncate(smr);
BinaryIO::writeLatticeObject<vobj, sobj_double>(SmartConfig.get_U(true), smr, munge, 0, Params.format,
nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << "Written Binary Smeared Configuration " << smr
<< " checksum " << std::hex
<< nersc_csum <<"/"
<< scidac_csuma <<"/"
<< scidac_csumb
<< std::dec << std::endl;
}
}
};

View File

@@ -69,17 +69,27 @@ public:
}
}
void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG,
void TrajectoryComplete(int traj,
ConfigurationBase<GaugeField> &SmartConfig,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG) {
if ((traj % Params.saveInterval) == 0) {
std::string config, rng;
std::string config, rng, smr;
this->build_filenames(traj, Params, config, rng);
GridBase *grid = U.Grid();
GridBase *grid = SmartConfig.get_U(false).Grid();
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << "Written BINARY RNG " << rng
<< " checksum " << std::hex
<< nersc_csum<<"/"
<< scidac_csuma<<"/"
<< scidac_csumb
<< std::dec << std::endl;
IldgWriter _IldgWriter(grid->IsBoss());
_IldgWriter.open(config);
_IldgWriter.writeConfiguration<GaugeStats>(U, traj, config, config);
_IldgWriter.writeConfiguration<GaugeStats>(SmartConfig.get_U(false), traj, config, config);
_IldgWriter.close();
std::cout << GridLogMessage << "Written ILDG Configuration on " << config
@@ -88,6 +98,21 @@ public:
<< scidac_csuma<<"/"
<< scidac_csumb
<< std::dec << std::endl;
if ( Params.saveSmeared ) {
IldgWriter _IldgWriter(grid->IsBoss());
_IldgWriter.open(smr);
_IldgWriter.writeConfiguration<GaugeStats>(SmartConfig.get_U(true), traj, config, config);
_IldgWriter.close();
std::cout << GridLogMessage << "Written ILDG Configuration on " << smr
<< " checksum " << std::hex
<< nersc_csum<<"/"
<< scidac_csuma<<"/"
<< scidac_csumb
<< std::dec << std::endl;
}
}
};

View File

@@ -52,23 +52,29 @@ public:
Params.format = "IEEE64BIG"; // fixed, overwrite any other choice
}
void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG,
GridParallelRNG &pRNG) {
virtual void TrajectoryComplete(int traj,
ConfigurationBase<GaugeField> &SmartConfig,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG)
{
if ((traj % Params.saveInterval) == 0) {
std::string config, rng;
this->build_filenames(traj, Params, config, rng);
std::string config, rng, smr;
this->build_filenames(traj, Params, config, smr, rng);
int precision32 = 1;
int tworow = 0;
NerscIO::writeRNGState(sRNG, pRNG, rng);
NerscIO::writeConfiguration<GaugeStats>(U, config, tworow, precision32);
NerscIO::writeConfiguration<GaugeStats>(SmartConfig.get_U(false), config, tworow, precision32);
if ( Params.saveSmeared ) {
NerscIO::writeConfiguration<GaugeStats>(SmartConfig.get_U(true), smr, tworow, precision32);
}
}
};
void CheckpointRestore(int traj, GaugeField &U, GridSerialRNG &sRNG,
GridParallelRNG &pRNG) {
std::string config, rng;
this->build_filenames(traj, Params, config, rng);
std::string config, rng, smr;
this->build_filenames(traj, Params, config, smr, rng );
this->check_filename(rng);
this->check_filename(config);

View File

@@ -70,19 +70,37 @@ class ScidacHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
}
}
void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG,
void TrajectoryComplete(int traj,
ConfigurationBase<Field> &SmartConfig,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG) {
if ((traj % Params.saveInterval) == 0) {
std::string config, rng;
this->build_filenames(traj, Params, config, rng);
GridBase *grid = U.Grid();
std::string config, rng,smr;
this->build_filenames(traj, Params, config, smr, rng);
GridBase *grid = SmartConfig.get_U(false).Grid();
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
ScidacWriter _ScidacWriter(grid->IsBoss());
_ScidacWriter.open(config);
_ScidacWriter.writeScidacFieldRecord(U, MData);
_ScidacWriter.close();
std::cout << GridLogMessage << "Written Binary RNG " << rng
<< " checksum " << std::hex
<< nersc_csum <<"/"
<< scidac_csuma <<"/"
<< scidac_csumb
<< std::dec << std::endl;
{
ScidacWriter _ScidacWriter(grid->IsBoss());
_ScidacWriter.open(config);
_ScidacWriter.writeScidacFieldRecord(SmartConfig.get_U(false), MData);
_ScidacWriter.close();
}
if ( Params.saveSmeared ) {
ScidacWriter _ScidacWriter(grid->IsBoss());
_ScidacWriter.open(smr);
_ScidacWriter.writeScidacFieldRecord(SmartConfig.get_U(true), MData);
_ScidacWriter.close();
}
std::cout << GridLogMessage << "Written Scidac Configuration on " << config << std::endl;
}
};

View File

@@ -66,6 +66,7 @@ public:
template <class FieldImplementation_, class SmearingPolicy, class RepresentationPolicy>
class Integrator {
protected:
public:
typedef FieldImplementation_ FieldImplementation;
typedef typename FieldImplementation::Field MomentaField; //for readability
typedef typename FieldImplementation::Field Field;
@@ -86,6 +87,8 @@ protected:
const ActionSet<Field, RepresentationPolicy> as;
ActionSet<Field,RepresentationPolicy> LevelForces;
//Get a pointer to a shared static instance of the "do-nothing" momentum filter to serve as a default
static MomentumFilterBase<MomentaField> const* getDefaultMomFilter(){
static MomentumFilterNone<MomentaField> filter;
@@ -96,7 +99,6 @@ protected:
{
t_P[level] += ep;
update_P(P, U, level, ep);
std::cout << GridLogIntegrator << "[" << level << "] P " << " dt " << ep << " : t_P " << t_P[level] << std::endl;
}
@@ -124,35 +126,33 @@ protected:
// input U actually not used in the fundamental case
// Fundamental updates, include smearing
assert(as.size()==LevelForces.size());
Field level_force(U.Grid()); level_force =Zero();
for (int a = 0; a < as[level].actions.size(); ++a) {
double start_full = usecond();
Field force(U.Grid());
conformable(U.Grid(), Mom.Grid());
Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared);
double start_force = usecond();
std::cout << GridLogMessage << "AuditForce["<<level<<"]["<<a<<"] before"<<std::endl;
as[level].actions.at(a)->deriv_timer_start();
as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta
as[level].actions.at(a)->deriv(Smearer, force); // deriv should NOT include Ta
as[level].actions.at(a)->deriv_timer_stop();
std::cout << GridLogMessage << "AuditForce["<<level<<"]["<<a<<"] after"<<std::endl;
std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl;
auto name = as[level].actions.at(a)->action_name();
if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
force = FieldImplementation::projectForce(force); // Ta for gauge fields
double end_force = usecond();
// DumpSliceNorm("force ",force,Nd-1);
MomFilter->applyFilter(force);
std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<<a <<"] "<<name<<" dt "<<ep<< std::endl;
DumpSliceNorm("force filtered ",force,Nd-1);
MomFilter->applyFilter(force);
std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<<a <<"] "<<name<<" dt "<<ep<< std::endl;
// track the total
level_force = level_force+force;
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x])
Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
@@ -175,6 +175,16 @@ protected:
}
{
// total force
Real force_abs = std::sqrt(norm2(level_force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x])
Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
Real force_max = std::sqrt(maxLocalNorm2(level_force));
Real impulse_max = force_max * ep * HMC_MOMENTUM_DENOMINATOR;
LevelForces[level].actions.at(0)->deriv_log(force_abs,force_max,impulse_abs,impulse_max);
}
// Force from the other representations
as[level].apply(update_P_hireps, Representations, Mom, U, ep);
@@ -224,6 +234,16 @@ public:
//Default the momentum filter to "do-nothing"
MomFilter = getDefaultMomFilter();
for (int level = 0; level < as.size(); ++level) {
int multiplier = as.at(level).multiplier;
ActionLevel<Field> * Level = new ActionLevel<Field>(multiplier);
Level->push_back(new EmptyAction<Field>);
LevelForces.push_back(*Level);
// does it copy by value or reference??
// - answer it copies by value, BUT the action level contains a reference that is NOT updated.
// Unsafe code in Guido's area
}
};
virtual ~Integrator() {}
@@ -241,10 +261,14 @@ public:
void reset_timer(void)
{
assert(as.size()==LevelForces.size());
for (int level = 0; level < as.size(); ++level) {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
as[level].actions.at(actionID)->reset_timer();
}
int actionID=0;
assert(LevelForces.at(level).actions.size()==1);
LevelForces.at(level).actions.at(actionID)->reset_timer();
}
}
void print_timer(void)
@@ -306,6 +330,16 @@ public:
<<" calls " << as[level].actions.at(actionID)->deriv_num
<< std::endl;
}
int actionID=0;
std::cout << GridLogMessage
<< LevelForces[level].actions.at(actionID)->action_name()
<<"["<<level<<"]["<< actionID<<"] :\n\t\t "
<<" force max " << LevelForces[level].actions.at(actionID)->deriv_max_average()
<<" norm " << LevelForces[level].actions.at(actionID)->deriv_norm_average()
<<" Fdt max " << LevelForces[level].actions.at(actionID)->Fdt_max_average()
<<" Fdt norm " << LevelForces[level].actions.at(actionID)->Fdt_norm_average()
<<" calls " << LevelForces[level].actions.at(actionID)->deriv_num
<< std::endl;
}
std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::"<< std::endl;
}
@@ -327,6 +361,13 @@ public:
std::cout << as[level].actions.at(actionID)->LogParameters();
}
}
std::cout << " [Integrator] Total Force loggers: "<< LevelForces.size() <<std::endl;
for (int level = 0; level < LevelForces.size(); ++level) {
std::cout << GridLogMessage << "[Integrator] ---- Level: "<< level << std::endl;
for (int actionID = 0; actionID < LevelForces[level].actions.size(); ++actionID) {
std::cout << GridLogMessage << "["<< LevelForces[level].actions.at(actionID)->action_name() << "] ID: " << actionID << std::endl;
}
}
std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::"<< std::endl;
}
@@ -377,14 +418,9 @@ public:
auto name = as[level].actions.at(actionID)->action_name();
std::cout << GridLogMessage << "refresh [" << level << "][" << actionID << "] "<<name << std::endl;
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
std::cout << GridLogMessage << "AuditRefresh["<<level<<"]["<<actionID<<"] before"<<std::endl;
as[level].actions.at(actionID)->refresh_timer_start();
as[level].actions.at(actionID)->refresh(Us, sRNG, pRNG);
as[level].actions.at(actionID)->refresh(Smearer, sRNG, pRNG);
as[level].actions.at(actionID)->refresh_timer_stop();
std::cout << GridLogMessage << "AuditRefresh["<<level<<"]["<<actionID<<"] after"<<std::endl;
}
@@ -413,6 +449,7 @@ public:
RealD S(Field& U)
{ // here also U not used
assert(as.size()==LevelForces.size());
std::cout << GridLogIntegrator << "Integrator action\n";
RealD H = - FieldImplementation::FieldSquareNorm(P)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom
@@ -425,10 +462,9 @@ public:
// get gauge field from the SmearingPolicy and
// based on the boolean is_smeared in actionID
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
as[level].actions.at(actionID)->S_timer_start();
Hterm = as[level].actions.at(actionID)->S(Us);
Hterm = as[level].actions.at(actionID)->S(Smearer);
as[level].actions.at(actionID)->S_timer_stop();
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
H += Hterm;
@@ -469,12 +505,11 @@ public:
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
// get gauge field from the SmearingPolicy and
// based on the boolean is_smeared in actionID
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
as[level].actions.at(actionID)->S_timer_start();
Hterm = as[level].actions.at(actionID)->Sinitial(Us);
as[level].actions.at(actionID)->S_timer_stop();
as[level].actions.at(actionID)->S_timer_start();
Hterm = as[level].actions.at(actionID)->S(Smearer);
as[level].actions.at(actionID)->S_timer_stop();
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
H += Hterm;

View File

@@ -34,6 +34,13 @@ NAMESPACE_BEGIN(Grid);
template <class Field>
class HmcObservable {
public:
virtual void TrajectoryComplete(int traj,
ConfigurationBase<Field> &SmartConfig,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG)
{
TrajectoryComplete(traj,SmartConfig.get_U(false),sRNG,pRNG); // Unsmeared observable
};
virtual void TrajectoryComplete(int traj,
Field &U,
GridSerialRNG &sRNG,

View File

@@ -42,6 +42,18 @@ public:
// necessary for HmcObservable compatibility
typedef typename Impl::Field Field;
virtual void TrajectoryComplete(int traj,
ConfigurationBase<Field> &SmartConfig,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG)
{
std::cout << GridLogMessage << "+++++++++++++++++++"<<std::endl;
std::cout << GridLogMessage << "Unsmeared plaquette"<<std::endl;
TrajectoryComplete(traj,SmartConfig.get_U(false),sRNG,pRNG); // Unsmeared observable
std::cout << GridLogMessage << "Smeared plaquette"<<std::endl;
TrajectoryComplete(traj,SmartConfig.get_U(true),sRNG,pRNG); // Unsmeared observable
std::cout << GridLogMessage << "+++++++++++++++++++"<<std::endl;
};
void TrajectoryComplete(int traj,
Field &U,
GridSerialRNG &sRNG,

View File

@@ -13,7 +13,7 @@ NAMESPACE_BEGIN(Grid);
* Empty since HMC updates already the fundamental representation
*/
template <int ncolour>
template <int ncolour, class group_name>
class FundamentalRep {
public:
static const int Dimension = ncolour;
@@ -21,7 +21,7 @@ public:
// typdef to be used by the Representations class in HMC to get the
// types for the higher representation fields
typedef typename SU<ncolour>::LatticeMatrix LatticeMatrix;
typedef typename GaugeGroup<ncolour,group_name>::LatticeMatrix LatticeMatrix;
typedef LatticeGaugeField LatticeField;
explicit FundamentalRep(GridBase* grid) {} //do nothing
@@ -45,7 +45,8 @@ public:
typedef FundamentalRep<Nc> FundamentalRepresentation;
typedef FundamentalRep<Nc,GroupName::SU> FundamentalRepresentation;
typedef FundamentalRep<Nc,GroupName::Sp> SpFundamentalRepresentation;
NAMESPACE_END(Grid);

View File

@@ -20,14 +20,14 @@ NAMESPACE_BEGIN(Grid);
* in the SUnTwoIndex.h file
*/
template <int ncolour, TwoIndexSymmetry S>
template <int ncolour, TwoIndexSymmetry S, class group_name = GroupName::SU>
class TwoIndexRep {
public:
// typdef to be used by the Representations class in HMC to get the
// types for the higher representation fields
typedef typename SU_TwoIndex<ncolour, S>::LatticeTwoIndexMatrix LatticeMatrix;
typedef typename SU_TwoIndex<ncolour, S>::LatticeTwoIndexField LatticeField;
static const int Dimension = ncolour * (ncolour + S) / 2;
typedef typename GaugeGroupTwoIndex<ncolour, S, group_name>::LatticeTwoIndexMatrix LatticeMatrix;
typedef typename GaugeGroupTwoIndex<ncolour, S, group_name>::LatticeTwoIndexField LatticeField;
static const int Dimension = GaugeGroupTwoIndex<ncolour,S,group_name>::Dimension;
static const bool isFundamental = false;
LatticeField U;
@@ -43,10 +43,10 @@ public:
U = Zero();
LatticeColourMatrix tmp(Uin.Grid());
Vector<typename SU<ncolour>::Matrix> eij(Dimension);
Vector<typename GaugeGroup<ncolour,group_name>::Matrix> eij(Dimension);
for (int a = 0; a < Dimension; a++)
SU_TwoIndex<ncolour, S>::base(a, eij[a]);
GaugeGroupTwoIndex<ncolour, S, group_name>::base(a, eij[a]);
for (int mu = 0; mu < Nd; mu++) {
auto Uin_mu = peekLorentz(Uin, mu);
@@ -71,7 +71,7 @@ public:
out_mu = Zero();
typename SU<ncolour>::LatticeAlgebraVector h(in.Grid());
typename GaugeGroup<ncolour, group_name>::LatticeAlgebraVector h(in.Grid());
projectOnAlgebra(h, in_mu, double(Nc + 2 * S)); // factor T(r)/T(fund)
FundamentalLieAlgebraMatrix(h, out_mu); // apply scale only once
pokeLorentz(out, out_mu, mu);
@@ -80,20 +80,23 @@ public:
}
private:
void projectOnAlgebra(typename SU<ncolour>::LatticeAlgebraVector &h_out,
void projectOnAlgebra(typename GaugeGroup<ncolour, group_name>::LatticeAlgebraVector &h_out,
const LatticeMatrix &in, Real scale = 1.0) const {
SU_TwoIndex<ncolour, S>::projectOnAlgebra(h_out, in, scale);
GaugeGroupTwoIndex<ncolour, S,group_name>::projectOnAlgebra(h_out, in, scale);
}
void FundamentalLieAlgebraMatrix(
typename SU<ncolour>::LatticeAlgebraVector &h,
typename SU<ncolour>::LatticeMatrix &out, Real scale = 1.0) const {
SU<ncolour>::FundamentalLieAlgebraMatrix(h, out, scale);
typename GaugeGroup<ncolour, group_name>::LatticeAlgebraVector &h,
typename GaugeGroup<ncolour, group_name>::LatticeMatrix &out, Real scale = 1.0) const {
GaugeGroup<ncolour,group_name>::FundamentalLieAlgebraMatrix(h, out, scale);
}
};
typedef TwoIndexRep<Nc, Symmetric> TwoIndexSymmetricRepresentation;
typedef TwoIndexRep<Nc, AntiSymmetric> TwoIndexAntiSymmetricRepresentation;
typedef TwoIndexRep<Nc, Symmetric, GroupName::SU> TwoIndexSymmetricRepresentation;
typedef TwoIndexRep<Nc, AntiSymmetric, GroupName::SU> TwoIndexAntiSymmetricRepresentation;
typedef TwoIndexRep<Nc, Symmetric, GroupName::Sp> SpTwoIndexSymmetricRepresentation;
typedef TwoIndexRep<Nc, AntiSymmetric, GroupName::Sp> SpTwoIndexAntiSymmetricRepresentation;
NAMESPACE_END(Grid);

View File

@@ -7,26 +7,27 @@
NAMESPACE_BEGIN(Grid);
//trivial class for no smearing
template< class Impl >
class NoSmearing
class NoSmearing : public ConfigurationBase<typename Impl::Field>
{
public:
INHERIT_FIELD_TYPES(Impl);
Field* ThinField;
Field* ThinLinks;
NoSmearing(): ThinField(NULL) {}
NoSmearing(): ThinLinks(NULL) {}
void set_Field(Field& U) { ThinField = &U; }
virtual void set_Field(Field& U) { ThinLinks = &U; }
void smeared_force(Field&) const {}
virtual void smeared_force(Field&) {}
Field& get_SmearedU() { return *ThinField; }
virtual Field& get_SmearedU() { return *ThinLinks; }
Field &get_U(bool smeared = false)
virtual Field &get_U(bool smeared = false)
{
return *ThinField;
return *ThinLinks;
}
};
@@ -42,19 +43,24 @@ public:
It stores a list of smeared configurations.
*/
template <class Gimpl>
class SmearedConfiguration
class SmearedConfiguration : public ConfigurationBase<typename Gimpl::Field>
{
public:
INHERIT_GIMPL_TYPES(Gimpl);
private:
protected:
const unsigned int smearingLevels;
Smear_Stout<Gimpl> *StoutSmearing;
std::vector<GaugeField> SmearedSet;
public:
GaugeField* ThinLinks; /* Pointer to the thin links configuration */ // move to base???
protected:
// Member functions
//====================================================================
void fill_smearedSet(GaugeField &U)
// Overridden in masked version
virtual void fill_smearedSet(GaugeField &U)
{
ThinLinks = &U; // attach the smearing routine to the field U
@@ -82,9 +88,10 @@ private:
}
}
}
//====================================================================
GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime,
const GaugeField& GaugeK) const
//overridden in masked verson
virtual GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime,
const GaugeField& GaugeK) const
{
GridBase* grid = GaugeK.Grid();
GaugeField C(grid), SigmaK(grid), iLambda(grid);
@@ -213,8 +220,6 @@ private:
//====================================================================
public:
GaugeField*
ThinLinks; /* Pointer to the thin links configuration */
/* Standard constructor */
SmearedConfiguration(GridCartesian* UGrid, unsigned int Nsmear,
@@ -230,7 +235,7 @@ public:
: smearingLevels(0), StoutSmearing(nullptr), SmearedSet(), ThinLinks(NULL) {}
// attach the smeared routines to the thin links U and fill the smeared set
void set_Field(GaugeField &U)
virtual void set_Field(GaugeField &U)
{
double start = usecond();
fill_smearedSet(U);
@@ -240,7 +245,7 @@ public:
}
//====================================================================
void smeared_force(GaugeField &SigmaTilde) const
virtual void smeared_force(GaugeField &SigmaTilde)
{
if (smearingLevels > 0)
{
@@ -267,14 +272,16 @@ public:
}
double end = usecond();
double time = (end - start)/ 1e3;
std::cout << GridLogMessage << "Smearing force in " << time << " ms" << std::endl;
std::cout << GridLogMessage << " GaugeConfiguration: Smeared Force chain rule took " << time << " ms" << std::endl;
} // if smearingLevels = 0 do nothing
SigmaTilde=Gimpl::projectForce(SigmaTilde); // Ta
}
//====================================================================
GaugeField& get_SmearedU() { return SmearedSet[smearingLevels - 1]; }
virtual GaugeField& get_SmearedU() { return SmearedSet[smearingLevels - 1]; }
GaugeField &get_U(bool smeared = false)
virtual GaugeField &get_U(bool smeared = false)
{
// get the config, thin links by default
if (smeared)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,87 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/gauge/JacobianAction.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////////////////////////
// Jacobian Action ..
////////////////////////////////////////////////////////////////////////
template <class Gimpl>
class JacobianAction : public Action<typename Gimpl::GaugeField> {
public:
INHERIT_GIMPL_TYPES(Gimpl);
SmearedConfigurationMasked<Gimpl> * smearer;
/////////////////////////// constructors
explicit JacobianAction(SmearedConfigurationMasked<Gimpl> * _smearer ) { smearer=_smearer;};
virtual std::string action_name() {return "JacobianAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "[JacobianAction] " << std::endl;
return sstream.str();
}
//////////////////////////////////
// Usual cases are not used
//////////////////////////////////
virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG){ assert(0);};
virtual RealD S(const GaugeField &U) { assert(0); }
virtual void deriv(const GaugeField &U, GaugeField &dSdU) { assert(0); }
//////////////////////////////////
// Functions of smart configs only
//////////////////////////////////
virtual void refresh(ConfigurationBase<GaugeField> & U, GridSerialRNG &sRNG, GridParallelRNG& pRNG)
{
return;
}
virtual RealD S(ConfigurationBase<GaugeField>& U)
{
// det M = e^{ - ( - logDetM) }
assert( &U == smearer );
return -smearer->logDetJacobian();
}
virtual RealD Sinitial(ConfigurationBase<GaugeField>& U)
{
return S(U);
}
virtual void deriv(ConfigurationBase<GaugeField>& U, GaugeField& dSdU)
{
assert( &U == smearer );
smearer->logDetJacobianForce(dSdU);
}
private:
};
NAMESPACE_END(Grid);

View File

@@ -40,7 +40,9 @@ template <class Gimpl>
class Smear_Stout : public Smear<Gimpl> {
private:
int OrthogDim = -1;
public:
const std::vector<double> SmearRho;
private:
// Smear<Gimpl>* ownership semantics:
// Smear<Gimpl>* passed in to constructor are owned by caller, so we don't delete them here
// Smear<Gimpl>* created within constructor need to be deleted as part of the destructor
@@ -67,7 +69,7 @@ public:
/*! Construct stout smearing object from explicitly specified rho matrix */
Smear_Stout(const std::vector<double>& rho_)
: OwnedBase{new Smear_APE<Gimpl>(rho_)}, SmearBase{OwnedBase.get()} {
std::cout << GridLogDebug << "Stout smearing constructor : Smear_Stout(const std::vector<double>& " << rho_ << " )" << std::endl
std::cout << GridLogDebug << "Stout smearing constructor : Smear_Stout(const std::vector<double>& " << rho_ << " )" << std::endl;
assert(Nc == 3 && "Stout smearing currently implemented only for Nc==3");
}

View File

@@ -37,13 +37,14 @@ NAMESPACE_BEGIN(Grid);
// Make these members of an Impl class for BC's.
namespace PeriodicBC {
//Out(x) = Link(x)*field(x+mu)
template<class covariant,class gauge> Lattice<covariant> CovShiftForward(const Lattice<gauge> &Link,
int mu,
const Lattice<covariant> &field)
{
return Link*Cshift(field,mu,1);// moves towards negative mu
}
//Out(x) = Link^dag(x-mu)*field(x-mu)
template<class covariant,class gauge> Lattice<covariant> CovShiftBackward(const Lattice<gauge> &Link,
int mu,
const Lattice<covariant> &field)
@@ -52,19 +53,19 @@ namespace PeriodicBC {
tmp = adj(Link)*field;
return Cshift(tmp,mu,-1);// moves towards positive mu
}
//Out(x) = Link^dag(x-mu)
template<class gauge> Lattice<gauge>
CovShiftIdentityBackward(const Lattice<gauge> &Link, int mu)
{
return Cshift(adj(Link), mu, -1);
}
//Out(x) = Link(x)
template<class gauge> Lattice<gauge>
CovShiftIdentityForward(const Lattice<gauge> &Link, int mu)
{
return Link;
}
//Link(x) = Link(x+mu)
template<class gauge> Lattice<gauge>
ShiftStaple(const Lattice<gauge> &Link, int mu)
{

528
Grid/qcd/utils/GaugeGroup.h Normal file
View File

@@ -0,0 +1,528 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/utils/GaugeGroup.h
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef QCD_UTIL_GAUGEGROUP_H
#define QCD_UTIL_GAUGEGROUP_H
// Important detail: nvcc requires all template parameters to have names.
// This is the only reason why the second template parameter has a name.
#define ONLY_IF_SU \
typename dummy_name = group_name, \
typename named_dummy = std::enable_if_t < \
std::is_same<dummy_name, group_name>::value && \
is_su<dummy_name>::value >
#define ONLY_IF_Sp \
typename dummy_name = group_name, \
typename named_dummy = std::enable_if_t < \
std::is_same<dummy_name, group_name>::value && \
is_sp<dummy_name>::value >
NAMESPACE_BEGIN(Grid);
namespace GroupName {
class SU {};
class Sp {};
} // namespace GroupName
template <typename group_name>
struct is_su {
static const bool value = false;
};
template <>
struct is_su<GroupName::SU> {
static const bool value = true;
};
template <typename group_name>
struct is_sp {
static const bool value = false;
};
template <>
struct is_sp<GroupName::Sp> {
static const bool value = true;
};
template <typename group_name>
constexpr int compute_adjoint_dimension(int ncolour);
template <>
constexpr int compute_adjoint_dimension<GroupName::SU>(int ncolour) {
return ncolour * ncolour - 1;
}
template <>
constexpr int compute_adjoint_dimension<GroupName::Sp>(int ncolour) {
return ncolour / 2 * (ncolour + 1);
}
template <int ncolour, class group_name>
class GaugeGroup {
public:
static const int Dimension = ncolour;
static const int AdjointDimension =
compute_adjoint_dimension<group_name>(ncolour);
static const int AlgebraDimension =
compute_adjoint_dimension<group_name>(ncolour);
template <typename vtype>
using iSU2Matrix = iScalar<iScalar<iMatrix<vtype, 2> > >;
template <typename vtype>
using iGroupMatrix = iScalar<iScalar<iMatrix<vtype, ncolour> > >;
template <typename vtype>
using iAlgebraVector = iScalar<iScalar<iVector<vtype, AdjointDimension> > >;
template <typename vtype>
using iSUnAlgebraMatrix =
iScalar<iScalar<iMatrix<vtype, AdjointDimension> > >;
static int su2subgroups(void) { return su2subgroups(group_name()); }
//////////////////////////////////////////////////////////////////////////////////////////////////
// Types can be accessed as SU<2>::Matrix , SU<2>::vSUnMatrix,
// SU<2>::LatticeMatrix etc...
//////////////////////////////////////////////////////////////////////////////////////////////////
typedef iGroupMatrix<Complex> Matrix;
typedef iGroupMatrix<ComplexF> MatrixF;
typedef iGroupMatrix<ComplexD> MatrixD;
typedef iGroupMatrix<vComplex> vMatrix;
typedef iGroupMatrix<vComplexF> vMatrixF;
typedef iGroupMatrix<vComplexD> vMatrixD;
// For the projectors to the algebra
// these should be real...
// keeping complex for consistency with the SIMD vector types
typedef iAlgebraVector<Complex> AlgebraVector;
typedef iAlgebraVector<ComplexF> AlgebraVectorF;
typedef iAlgebraVector<ComplexD> AlgebraVectorD;
typedef iAlgebraVector<vComplex> vAlgebraVector;
typedef iAlgebraVector<vComplexF> vAlgebraVectorF;
typedef iAlgebraVector<vComplexD> vAlgebraVectorD;
typedef Lattice<vMatrix> LatticeMatrix;
typedef Lattice<vMatrixF> LatticeMatrixF;
typedef Lattice<vMatrixD> LatticeMatrixD;
typedef Lattice<vAlgebraVector> LatticeAlgebraVector;
typedef Lattice<vAlgebraVectorF> LatticeAlgebraVectorF;
typedef Lattice<vAlgebraVectorD> LatticeAlgebraVectorD;
typedef iSUnAlgebraMatrix<vComplex> vAlgebraMatrix;
typedef iSUnAlgebraMatrix<vComplexF> vAlgebraMatrixF;
typedef iSUnAlgebraMatrix<vComplexD> vAlgebraMatrixD;
typedef Lattice<vAlgebraMatrix> LatticeAlgebraMatrix;
typedef Lattice<vAlgebraMatrixF> LatticeAlgebraMatrixF;
typedef Lattice<vAlgebraMatrixD> LatticeAlgebraMatrixD;
typedef iSU2Matrix<Complex> SU2Matrix;
typedef iSU2Matrix<ComplexF> SU2MatrixF;
typedef iSU2Matrix<ComplexD> SU2MatrixD;
typedef iSU2Matrix<vComplex> vSU2Matrix;
typedef iSU2Matrix<vComplexF> vSU2MatrixF;
typedef iSU2Matrix<vComplexD> vSU2MatrixD;
typedef Lattice<vSU2Matrix> LatticeSU2Matrix;
typedef Lattice<vSU2MatrixF> LatticeSU2MatrixF;
typedef Lattice<vSU2MatrixD> LatticeSU2MatrixD;
// Private implementation details are specified in the following files:
// Grid/qcd/utils/SUn.impl
// Grid/qcd/utils/SUn.impl
// The public part of the interface follows below and refers to these
// private member functions.
#include <Grid/qcd/utils/SUn.impl.h>
#include <Grid/qcd/utils/Sp2n.impl.h>
public:
template <class cplx>
static void generator(int lieIndex, iGroupMatrix<cplx> &ta) {
return generator(lieIndex, ta, group_name());
}
static accelerator_inline void su2SubGroupIndex(int &i1, int &i2, int su2_index) {
return su2SubGroupIndex(i1, i2, su2_index, group_name());
}
static void testGenerators(void) { testGenerators(group_name()); }
static void printGenerators(void) {
for (int gen = 0; gen < AlgebraDimension; gen++) {
Matrix ta;
generator(gen, ta);
std::cout << GridLogMessage << "Nc = " << ncolour << " t_" << gen
<< std::endl;
std::cout << GridLogMessage << ta << std::endl;
}
}
template <typename LatticeMatrixType>
static void LieRandomize(GridParallelRNG &pRNG, LatticeMatrixType &out,
double scale = 1.0) {
GridBase *grid = out.Grid();
typedef typename LatticeMatrixType::vector_type vector_type;
typedef iSinglet<vector_type> vTComplexType;
typedef Lattice<vTComplexType> LatticeComplexType;
typedef typename GridTypeMapper<
typename LatticeMatrixType::vector_object>::scalar_object MatrixType;
LatticeComplexType ca(grid);
LatticeMatrixType lie(grid);
LatticeMatrixType la(grid);
ComplexD ci(0.0, scale);
MatrixType ta;
lie = Zero();
for (int a = 0; a < AlgebraDimension; a++) {
random(pRNG, ca);
ca = (ca + conjugate(ca)) * 0.5;
ca = ca - 0.5;
generator(a, ta);
la = ci * ca * ta;
lie = lie + la; // e^{i la ta}
}
taExp(lie, out);
}
static void GaussianFundamentalLieAlgebraMatrix(GridParallelRNG &pRNG,
LatticeMatrix &out,
Real scale = 1.0) {
GridBase *grid = out.Grid();
LatticeReal ca(grid);
LatticeMatrix la(grid);
Complex ci(0.0, scale);
Matrix ta;
out = Zero();
for (int a = 0; a < AlgebraDimension; a++) {
gaussian(pRNG, ca);
generator(a, ta);
la = toComplex(ca) * ta;
out += la;
}
out *= ci;
}
static void FundamentalLieAlgebraMatrix(const LatticeAlgebraVector &h,
LatticeMatrix &out,
Real scale = 1.0) {
conformable(h, out);
GridBase *grid = out.Grid();
LatticeMatrix la(grid);
Matrix ta;
out = Zero();
for (int a = 0; a < AlgebraDimension; a++) {
generator(a, ta);
la = peekColour(h, a) * timesI(ta) * scale;
out += la;
}
}
// Projects the algebra components a lattice matrix (of dimension ncol*ncol -1
// ) inverse operation: FundamentalLieAlgebraMatrix
static void projectOnAlgebra(LatticeAlgebraVector &h_out,
const LatticeMatrix &in, Real scale = 1.0) {
conformable(h_out, in);
h_out = Zero();
Matrix Ta;
for (int a = 0; a < AlgebraDimension; a++) {
generator(a, Ta);
pokeColour(h_out, -2.0 * (trace(timesI(Ta) * in)) * scale, a);
}
}
template <class vtype>
accelerator_inline static iScalar<vtype> ProjectOnGeneralGroup(const iScalar<vtype> &r) {
return ProjectOnGeneralGroup(r, group_name());
}
template <class vtype, int N>
accelerator_inline static iVector<vtype,N> ProjectOnGeneralGroup(const iVector<vtype,N> &r) {
return ProjectOnGeneralGroup(r, group_name());
}
template <class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
accelerator_inline static iMatrix<vtype,N> ProjectOnGeneralGroup(const iMatrix<vtype,N> &arg) {
return ProjectOnGeneralGroup(arg, group_name());
}
template <int N,class vComplex_t> // Projects on the general groups U(N), Sp(2N)xZ2 i.e. determinant is allowed a complex phase.
static void ProjectOnGeneralGroup(Lattice<iVector<iScalar<iMatrix<vComplex_t, N> >, Nd> > &U) {
for (int mu = 0; mu < Nd; mu++) {
auto Umu = PeekIndex<LorentzIndex>(U, mu);
Umu = ProjectOnGeneralGroup(Umu);
}
}
template <int N,class vComplex_t>
static Lattice<iScalar<iScalar<iMatrix<vComplex_t, N> > > > ProjectOnGeneralGroup(const Lattice<iScalar<iScalar<iMatrix<vComplex_t, N> > > > &Umu) {
return ProjectOnGeneralGroup(Umu, group_name());
}
template <int N,class vComplex_t> // Projects on SU(N), Sp(2N), with unit determinant, by first projecting on general group and then enforcing unit determinant
static void ProjectOnSpecialGroup(Lattice<iScalar<iScalar<iMatrix<vComplex_t, N> > > > &Umu) {
Umu = ProjectOnGeneralGroup(Umu);
auto det = Determinant(Umu);
det = conjugate(det);
for (int i = 0; i < N; i++) {
auto element = PeekIndex<ColourIndex>(Umu, N - 1, i);
element = element * det;
PokeIndex<ColourIndex>(Umu, element, Nc - 1, i);
}
}
template <int N,class vComplex_t> // reunitarise, resimplectify... previously ProjectSUn
static void ProjectOnSpecialGroup(Lattice<iVector<iScalar<iMatrix<vComplex_t, N> >, Nd> > &U) {
// Reunitarise
for (int mu = 0; mu < Nd; mu++) {
auto Umu = PeekIndex<LorentzIndex>(U, mu);
ProjectOnSpecialGroup(Umu);
PokeIndex<LorentzIndex>(U, Umu, mu);
}
}
template <typename GaugeField>
static void HotConfiguration(GridParallelRNG &pRNG, GaugeField &out) {
typedef typename GaugeField::vector_type vector_type;
typedef iGroupMatrix<vector_type> vMatrixType;
typedef Lattice<vMatrixType> LatticeMatrixType;
LatticeMatrixType Umu(out.Grid());
LatticeMatrixType tmp(out.Grid());
for (int mu = 0; mu < Nd; mu++) {
// LieRandomize(pRNG, Umu, 1.0);
// PokeIndex<LorentzIndex>(out, Umu, mu);
gaussian(pRNG,Umu);
tmp = Ta(Umu);
taExp(tmp,Umu);
ProjectOnSpecialGroup(Umu);
// ProjectSUn(Umu);
PokeIndex<LorentzIndex>(out, Umu, mu);
}
}
template <typename GaugeField>
static void TepidConfiguration(GridParallelRNG &pRNG, GaugeField &out) {
typedef typename GaugeField::vector_type vector_type;
typedef iGroupMatrix<vector_type> vMatrixType;
typedef Lattice<vMatrixType> LatticeMatrixType;
LatticeMatrixType Umu(out.Grid());
for (int mu = 0; mu < Nd; mu++) {
LieRandomize(pRNG, Umu, 0.01);
PokeIndex<LorentzIndex>(out, Umu, mu);
}
}
template <typename GaugeField>
static void ColdConfiguration(GaugeField &out) {
typedef typename GaugeField::vector_type vector_type;
typedef iGroupMatrix<vector_type> vMatrixType;
typedef Lattice<vMatrixType> LatticeMatrixType;
LatticeMatrixType Umu(out.Grid());
Umu = 1.0;
for (int mu = 0; mu < Nd; mu++) {
PokeIndex<LorentzIndex>(out, Umu, mu);
}
}
template <typename GaugeField>
static void ColdConfiguration(GridParallelRNG &pRNG, GaugeField &out) {
ColdConfiguration(out);
}
template <typename LatticeMatrixType>
static void taProj(const LatticeMatrixType &in, LatticeMatrixType &out) {
taProj(in, out, group_name());
}
template <typename LatticeMatrixType>
static void taExp(const LatticeMatrixType &x, LatticeMatrixType &ex) {
typedef typename LatticeMatrixType::scalar_type ComplexType;
LatticeMatrixType xn(x.Grid());
RealD nfac = 1.0;
xn = x;
ex = xn + ComplexType(1.0); // 1+x
// Do a 12th order exponentiation
for (int i = 2; i <= 12; ++i) {
nfac = nfac / RealD(i); // 1/2, 1/2.3 ...
xn = xn * x; // x2, x3,x4....
ex = ex + xn * nfac; // x2/2!, x3/3!....
}
}
// Ta are hermitian (?)
// Anti herm is i Ta basis
static void LieAlgebraProject(LatticeAlgebraMatrix &out,const LatticeMatrix &in, int b)
{
conformable(in, out);
GridBase *grid = out.Grid();
LatticeComplex tmp(grid);
Matrix ta;
// Using Luchang's projection convention
// 2 Tr{Ta Tb} A_b= 2/2 delta ab A_b = A_a
autoView(out_v,out,AcceleratorWrite);
autoView(in_v,in,AcceleratorRead);
int N = ncolour;
int NNm1 = N * (N - 1);
int hNNm1= NNm1/2;
RealD sqrt_2 = sqrt(2.0);
Complex ci(0.0,1.0);
for(int su2Index=0;su2Index<hNNm1;su2Index++){
int i1, i2;
su2SubGroupIndex(i1, i2, su2Index);
int ax = su2Index*2;
int ay = su2Index*2+1;
accelerator_for(ss,grid->oSites(),1,{
// in is traceless ANTI-hermitian whereas Grid generators are Hermitian.
// trace( Ta x Ci in)
// Bet I need to move to real part with mult by -i
out_v[ss]()()(ax,b) = 0.5*(real(in_v[ss]()()(i2,i1)) - real(in_v[ss]()()(i1,i2)));
out_v[ss]()()(ay,b) = 0.5*(imag(in_v[ss]()()(i1,i2)) + imag(in_v[ss]()()(i2,i1)));
});
}
for(int diagIndex=0;diagIndex<N-1;diagIndex++){
int k = diagIndex + 1; // diagIndex starts from 0
int a = NNm1+diagIndex;
RealD scale = 1.0/sqrt(2.0*k*(k+1));
accelerator_for(ss,grid->oSites(),vComplex::Nsimd(),{
auto tmp = in_v[ss]()()(0,0);
for(int i=1;i<k;i++){
tmp=tmp+in_v[ss]()()(i,i);
}
tmp = tmp - in_v[ss]()()(k,k)*k;
out_v[ss]()()(a,b) =imag(tmp) * scale;
});
}
}
};
template <int ncolour>
using SU = GaugeGroup<ncolour, GroupName::SU>;
template <int ncolour>
using Sp = GaugeGroup<ncolour, GroupName::Sp>;
typedef SU<2> SU2;
typedef SU<3> SU3;
typedef SU<4> SU4;
typedef SU<5> SU5;
typedef SU<Nc> FundamentalMatrices;
typedef Sp<2> Sp2;
typedef Sp<4> Sp4;
typedef Sp<6> Sp6;
typedef Sp<8> Sp8;
template <int N,class vComplex_t>
static void ProjectSUn(Lattice<iScalar<iScalar<iMatrix<vComplex_t, N> > > > &Umu)
{
GaugeGroup<N,GroupName::SU>::ProjectOnSpecialGroup(Umu);
}
template <int N,class vComplex_t>
static void ProjectSUn(Lattice<iVector<iScalar<iMatrix<vComplex_t, N> >,Nd> > &U)
{
GaugeGroup<N,GroupName::SU>::ProjectOnSpecialGroup(U);
}
template <int N,class vComplex_t>
static void ProjectSpn(Lattice<iScalar<iScalar<iMatrix<vComplex_t, N> > > > &Umu)
{
GaugeGroup<N,GroupName::Sp>::ProjectOnSpecialGroup(Umu);
}
template <int N,class vComplex_t>
static void ProjectSpn(Lattice<iVector<iScalar<iMatrix<vComplex_t, N> >,Nd> > &U)
{
GaugeGroup<N,GroupName::Sp>::ProjectOnSpecialGroup(U);
}
// Explicit specialisation for SU(3).
static void ProjectSU3(Lattice<iScalar<iScalar<iMatrix<vComplexD, 3> > > > &Umu)
{
GridBase *grid = Umu.Grid();
const int x = 0;
const int y = 1;
const int z = 2;
// Reunitarise
Umu = ProjectOnGroup(Umu);
autoView(Umu_v, Umu, CpuWrite);
thread_for(ss, grid->oSites(), {
auto cm = Umu_v[ss];
cm()()(2, x) = adj(cm()()(0, y) * cm()()(1, z) -
cm()()(0, z) * cm()()(1, y)); // x= yz-zy
cm()()(2, y) = adj(cm()()(0, z) * cm()()(1, x) -
cm()()(0, x) * cm()()(1, z)); // y= zx-xz
cm()()(2, z) = adj(cm()()(0, x) * cm()()(1, y) -
cm()()(0, y) * cm()()(1, x)); // z= xy-yx
Umu_v[ss] = cm;
});
}
static void ProjectSU3(Lattice<iVector<iScalar<iMatrix<vComplexD, 3> >, Nd> > &U)
{
GridBase *grid = U.Grid();
// Reunitarise
for (int mu = 0; mu < Nd; mu++) {
auto Umu = PeekIndex<LorentzIndex>(U, mu);
Umu = ProjectOnGroup(Umu);
ProjectSU3(Umu);
PokeIndex<LorentzIndex>(U, Umu, mu);
}
}
NAMESPACE_END(Grid);
#endif

View File

@@ -0,0 +1,371 @@
////////////////////////////////////////////////////////////////////////
//
// * Two index representation generators
//
// * Normalisation for the fundamental generators:
// trace ta tb = 1/2 delta_ab = T_F delta_ab
// T_F = 1/2 for SU(N) groups
//
//
// base for NxN two index (anti-symmetric) matrices
// normalized to 1 (d_ij is the kroenecker delta)
//
// (e^(ij)_{kl} = 1 / sqrt(2) (d_ik d_jl +/- d_jk d_il)
//
// Then the generators are written as
//
// (iT_a)^(ij)(lk) = i * ( tr[e^(ij)^dag e^(lk) T^trasp_a] +
// tr[e^(lk)e^(ij)^dag T_a] ) //
//
//
////////////////////////////////////////////////////////////////////////
// Authors: David Preti, Guido Cossu
#ifndef QCD_UTIL_GAUGEGROUPTWOINDEX_H
#define QCD_UTIL_GAUGEGROUPTWOINDEX_H
NAMESPACE_BEGIN(Grid);
enum TwoIndexSymmetry { Symmetric = 1, AntiSymmetric = -1 };
constexpr inline Real delta(int a, int b) { return (a == b) ? 1.0 : 0.0; }
namespace detail {
template <class cplx, int nc, TwoIndexSymmetry S>
struct baseOffDiagonalSpHelper;
template <class cplx, int nc>
struct baseOffDiagonalSpHelper<cplx, nc, AntiSymmetric> {
static const int ngroup = nc / 2;
static void baseOffDiagonalSp(int i, int j, iScalar<iScalar<iMatrix<cplx, nc> > > &eij) {
eij = Zero();
RealD tmp;
if ((i == ngroup + j) && (1 <= j) && (j < ngroup)) {
for (int k = 0; k < j+1; k++) {
if (k < j) {
tmp = 1 / sqrt(j * (j + 1));
eij()()(k, k + ngroup) = tmp;
eij()()(k + ngroup, k) = -tmp;
}
if (k == j) {
tmp = -j / sqrt(j * (j + 1));
eij()()(k, k + ngroup) = tmp;
eij()()(k + ngroup, k) = -tmp;
}
}
}
else if (i != ngroup + j) {
for (int k = 0; k < nc; k++)
for (int l = 0; l < nc; l++) {
eij()()(l, k) =
delta(i, k) * delta(j, l) - delta(j, k) * delta(i, l);
}
}
RealD nrm = 1. / std::sqrt(2.0);
eij = eij * nrm;
}
};
template <class cplx, int nc>
struct baseOffDiagonalSpHelper<cplx, nc, Symmetric> {
static void baseOffDiagonalSp(int i, int j, iScalar<iScalar<iMatrix<cplx, nc> > > &eij) {
eij = Zero();
for (int k = 0; k < nc; k++)
for (int l = 0; l < nc; l++)
eij()()(l, k) =
delta(i, k) * delta(j, l) + delta(j, k) * delta(i, l);
RealD nrm = 1. / std::sqrt(2.0);
eij = eij * nrm;
}
};
} // closing detail namespace
template <int ncolour, TwoIndexSymmetry S, class group_name>
class GaugeGroupTwoIndex : public GaugeGroup<ncolour, group_name> {
public:
// The chosen convention is that we are taking ncolour to be N in SU<N> but 2N
// in Sp(2N). ngroup is equal to N for SU but 2N/2 = N for Sp(2N).
static_assert(std::is_same<group_name, GroupName::SU>::value or
std::is_same<group_name, GroupName::Sp>::value,
"ngroup is only implemented for SU and Sp currently.");
static const int ngroup =
std::is_same<group_name, GroupName::SU>::value ? ncolour : ncolour / 2;
static const int Dimension =
(ncolour * (ncolour + S) / 2) + (std::is_same<group_name, GroupName::Sp>::value ? (S - 1) / 2 : 0);
static const int DimensionAS =
(ncolour * (ncolour - 1) / 2) + (std::is_same<group_name, GroupName::Sp>::value ? (- 1) : 0);
static const int DimensionS =
ncolour * (ncolour + 1) / 2;
static const int NumGenerators =
GaugeGroup<ncolour, group_name>::AlgebraDimension;
template <typename vtype>
using iGroupTwoIndexMatrix = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
typedef iGroupTwoIndexMatrix<Complex> TIMatrix;
typedef iGroupTwoIndexMatrix<ComplexF> TIMatrixF;
typedef iGroupTwoIndexMatrix<ComplexD> TIMatrixD;
typedef iGroupTwoIndexMatrix<vComplex> vTIMatrix;
typedef iGroupTwoIndexMatrix<vComplexF> vTIMatrixF;
typedef iGroupTwoIndexMatrix<vComplexD> vTIMatrixD;
typedef Lattice<vTIMatrix> LatticeTwoIndexMatrix;
typedef Lattice<vTIMatrixF> LatticeTwoIndexMatrixF;
typedef Lattice<vTIMatrixD> LatticeTwoIndexMatrixD;
typedef Lattice<iVector<iScalar<iMatrix<vComplex, Dimension> >, Nd> >
LatticeTwoIndexField;
typedef Lattice<iVector<iScalar<iMatrix<vComplexF, Dimension> >, Nd> >
LatticeTwoIndexFieldF;
typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> >
LatticeTwoIndexFieldD;
template <typename vtype>
using iGroupMatrix = iScalar<iScalar<iMatrix<vtype, ncolour> > >;
typedef iGroupMatrix<Complex> Matrix;
typedef iGroupMatrix<ComplexF> MatrixF;
typedef iGroupMatrix<ComplexD> MatrixD;
private:
template <class cplx>
static void baseDiagonal(int Index, iGroupMatrix<cplx> &eij) {
eij = Zero();
eij()()(Index - ncolour * (ncolour - 1) / 2,
Index - ncolour * (ncolour - 1) / 2) = 1.0;
}
template <class cplx>
static void baseOffDiagonal(int i, int j, iGroupMatrix<cplx> &eij, GroupName::SU) {
eij = Zero();
for (int k = 0; k < ncolour; k++)
for (int l = 0; l < ncolour; l++)
eij()()(l, k) =
delta(i, k) * delta(j, l) + S * delta(j, k) * delta(i, l);
RealD nrm = 1. / std::sqrt(2.0);
eij = eij * nrm;
}
template <class cplx>
static void baseOffDiagonal(int i, int j, iGroupMatrix<cplx> &eij, GroupName::Sp) {
detail::baseOffDiagonalSpHelper<cplx, ncolour, S>::baseOffDiagonalSp(i, j, eij);
}
public:
template <class cplx>
static void base(int Index, iGroupMatrix<cplx> &eij) {
// returns (e)^(ij)_{kl} necessary for change of base U_F -> U_R
assert(Index < Dimension);
eij = Zero();
// for the linearisation of the 2 indexes
static int a[ncolour * (ncolour - 1) / 2][2]; // store the a <-> i,j
static bool filled = false;
if (!filled) {
int counter = 0;
for (int i = 1; i < ncolour; i++) {
for (int j = 0; j < i; j++) {
if (std::is_same<group_name, GroupName::Sp>::value)
{
if (j==0 && i==ngroup+j && S==-1) {
//std::cout << "skipping" << std::endl; // for Sp2n this vanishes identically.
j = j+1;
}
}
a[counter][0] = i;
a[counter][1] = j;
counter++;
}
}
filled = true;
}
if (Index < ncolour*ncolour - DimensionS)
{
baseOffDiagonal(a[Index][0], a[Index][1], eij, group_name());
} else {
baseDiagonal(Index, eij);
}
}
static void printBase(void) {
for (int gen = 0; gen < Dimension; gen++) {
Matrix tmp;
base(gen, tmp);
std::cout << GridLogMessage << "Nc = " << ncolour << " t_" << gen
<< std::endl;
std::cout << GridLogMessage << tmp << std::endl;
}
}
template <class cplx>
static void generator(int Index, iGroupTwoIndexMatrix<cplx> &i2indTa) {
Vector<iGroupMatrix<cplx> > ta(NumGenerators);
Vector<iGroupMatrix<cplx> > eij(Dimension);
iGroupMatrix<cplx> tmp;
for (int a = 0; a < NumGenerators; a++)
GaugeGroup<ncolour, group_name>::generator(a, ta[a]);
for (int a = 0; a < Dimension; a++) base(a, eij[a]);
for (int a = 0; a < Dimension; a++) {
tmp = transpose(eij[a]*ta[Index]) + transpose(eij[a]) * ta[Index];
for (int b = 0; b < Dimension; b++) {
Complex iTr = TensorRemove(timesI(trace(tmp * eij[b])));
i2indTa()()(a, b) = iTr;
}
}
}
static void printGenerators(void) {
for (int gen = 0; gen < NumGenerators; gen++) {
TIMatrix i2indTa;
generator(gen, i2indTa);
std::cout << GridLogMessage << "Nc = " << ncolour << " t_" << gen
<< std::endl;
std::cout << GridLogMessage << i2indTa << std::endl;
}
}
static void testGenerators(void) {
TIMatrix i2indTa, i2indTb;
std::cout << GridLogMessage << "2IndexRep - Checking if traceless"
<< std::endl;
for (int a = 0; a < NumGenerators; a++) {
generator(a, i2indTa);
std::cout << GridLogMessage << a << std::endl;
assert(norm2(trace(i2indTa)) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage << "2IndexRep - Checking if antihermitean"
<< std::endl;
for (int a = 0; a < NumGenerators; a++) {
generator(a, i2indTa);
std::cout << GridLogMessage << a << std::endl;
assert(norm2(adj(i2indTa) + i2indTa) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage
<< "2IndexRep - Checking Tr[Ta*Tb]=delta(a,b)*(N +- 2)/2"
<< std::endl;
for (int a = 0; a < NumGenerators; a++) {
for (int b = 0; b < NumGenerators; b++) {
generator(a, i2indTa);
generator(b, i2indTb);
// generator returns iTa, so we need a minus sign here
Complex Tr = -TensorRemove(trace(i2indTa * i2indTb));
std::cout << GridLogMessage << "a=" << a << "b=" << b << "Tr=" << Tr
<< std::endl;
if (a == b) {
assert(real(Tr) - ((ncolour + S * 2) * 0.5) < 1e-8);
} else {
assert(real(Tr) < 1e-8);
}
assert(imag(Tr) < 1e-8);
}
}
std::cout << GridLogMessage << std::endl;
}
static void TwoIndexLieAlgebraMatrix(
const typename GaugeGroup<ncolour, group_name>::LatticeAlgebraVector &h,
LatticeTwoIndexMatrix &out, Real scale = 1.0) {
conformable(h, out);
GridBase *grid = out.Grid();
LatticeTwoIndexMatrix la(grid);
TIMatrix i2indTa;
out = Zero();
for (int a = 0; a < NumGenerators; a++) {
generator(a, i2indTa);
la = peekColour(h, a) * i2indTa;
out += la;
}
out *= scale;
}
// Projects the algebra components
// of a lattice matrix ( of dimension ncol*ncol -1 )
static void projectOnAlgebra(
typename GaugeGroup<ncolour, group_name>::LatticeAlgebraVector &h_out,
const LatticeTwoIndexMatrix &in, Real scale = 1.0) {
conformable(h_out, in);
h_out = Zero();
TIMatrix i2indTa;
Real coefficient = -2.0 / (ncolour + 2 * S) * scale;
// 2/(Nc +/- 2) for the normalization of the trace in the two index rep
for (int a = 0; a < NumGenerators; a++) {
generator(a, i2indTa);
pokeColour(h_out, real(trace(i2indTa * in)) * coefficient, a);
}
}
// a projector that keeps the generators stored to avoid the overhead of
// recomputing them
static void projector(
typename GaugeGroup<ncolour, group_name>::LatticeAlgebraVector &h_out,
const LatticeTwoIndexMatrix &in, Real scale = 1.0) {
conformable(h_out, in);
// to store the generators
static std::vector<TIMatrix> i2indTa(NumGenerators);
h_out = Zero();
static bool precalculated = false;
if (!precalculated) {
precalculated = true;
for (int a = 0; a < NumGenerators; a++) generator(a, i2indTa[a]);
}
Real coefficient =
-2.0 / (ncolour + 2 * S) * scale; // 2/(Nc +/- 2) for the normalization
// of the trace in the two index rep
for (int a = 0; a < NumGenerators; a++) {
auto tmp = real(trace(i2indTa[a] * in)) * coefficient;
pokeColour(h_out, tmp, a);
}
}
};
template <int ncolour, TwoIndexSymmetry S>
using SU_TwoIndex = GaugeGroupTwoIndex<ncolour, S, GroupName::SU>;
// Some useful type names
typedef SU_TwoIndex<Nc, Symmetric> TwoIndexSymmMatrices;
typedef SU_TwoIndex<Nc, AntiSymmetric> TwoIndexAntiSymmMatrices;
typedef SU_TwoIndex<2, Symmetric> SU2TwoIndexSymm;
typedef SU_TwoIndex<3, Symmetric> SU3TwoIndexSymm;
typedef SU_TwoIndex<4, Symmetric> SU4TwoIndexSymm;
typedef SU_TwoIndex<5, Symmetric> SU5TwoIndexSymm;
typedef SU_TwoIndex<2, AntiSymmetric> SU2TwoIndexAntiSymm;
typedef SU_TwoIndex<3, AntiSymmetric> SU3TwoIndexAntiSymm;
typedef SU_TwoIndex<4, AntiSymmetric> SU4TwoIndexAntiSymm;
typedef SU_TwoIndex<5, AntiSymmetric> SU5TwoIndexAntiSymm;
template <int ncolour, TwoIndexSymmetry S>
using Sp_TwoIndex = GaugeGroupTwoIndex<ncolour, S, GroupName::Sp>;
typedef Sp_TwoIndex<Nc, Symmetric> SpTwoIndexSymmMatrices;
typedef Sp_TwoIndex<Nc, AntiSymmetric> SpTwoIndexAntiSymmMatrices;
typedef Sp_TwoIndex<2, Symmetric> Sp2TwoIndexSymm;
typedef Sp_TwoIndex<4, Symmetric> Sp4TwoIndexSymm;
typedef Sp_TwoIndex<4, AntiSymmetric> Sp4TwoIndexAntiSymm;
NAMESPACE_END(Grid);
#endif

View File

@@ -1,892 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/utils/SUn.h
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef QCD_UTIL_SUN_H
#define QCD_UTIL_SUN_H
NAMESPACE_BEGIN(Grid);
template <int ncolour>
class SU {
public:
static const int Dimension = ncolour;
static const int AdjointDimension = ncolour * ncolour - 1;
static int su2subgroups(void) { return (ncolour * (ncolour - 1)) / 2; }
template <typename vtype>
using iSUnMatrix = iScalar<iScalar<iMatrix<vtype, ncolour> > >;
template <typename vtype>
using iSU2Matrix = iScalar<iScalar<iMatrix<vtype, 2> > >;
template <typename vtype>
using iSUnAlgebraVector =
iScalar<iScalar<iVector<vtype, AdjointDimension> > >;
//////////////////////////////////////////////////////////////////////////////////////////////////
// Types can be accessed as SU<2>::Matrix , SU<2>::vSUnMatrix,
// SU<2>::LatticeMatrix etc...
//////////////////////////////////////////////////////////////////////////////////////////////////
typedef iSUnMatrix<Complex> Matrix;
typedef iSUnMatrix<ComplexF> MatrixF;
typedef iSUnMatrix<ComplexD> MatrixD;
typedef iSUnMatrix<vComplex> vMatrix;
typedef iSUnMatrix<vComplexF> vMatrixF;
typedef iSUnMatrix<vComplexD> vMatrixD;
// For the projectors to the algebra
// these should be real...
// keeping complex for consistency with the SIMD vector types
typedef iSUnAlgebraVector<Complex> AlgebraVector;
typedef iSUnAlgebraVector<ComplexF> AlgebraVectorF;
typedef iSUnAlgebraVector<ComplexD> AlgebraVectorD;
typedef iSUnAlgebraVector<vComplex> vAlgebraVector;
typedef iSUnAlgebraVector<vComplexF> vAlgebraVectorF;
typedef iSUnAlgebraVector<vComplexD> vAlgebraVectorD;
typedef Lattice<vMatrix> LatticeMatrix;
typedef Lattice<vMatrixF> LatticeMatrixF;
typedef Lattice<vMatrixD> LatticeMatrixD;
typedef Lattice<vAlgebraVector> LatticeAlgebraVector;
typedef Lattice<vAlgebraVectorF> LatticeAlgebraVectorF;
typedef Lattice<vAlgebraVectorD> LatticeAlgebraVectorD;
typedef iSU2Matrix<Complex> SU2Matrix;
typedef iSU2Matrix<ComplexF> SU2MatrixF;
typedef iSU2Matrix<ComplexD> SU2MatrixD;
typedef iSU2Matrix<vComplex> vSU2Matrix;
typedef iSU2Matrix<vComplexF> vSU2MatrixF;
typedef iSU2Matrix<vComplexD> vSU2MatrixD;
typedef Lattice<vSU2Matrix> LatticeSU2Matrix;
typedef Lattice<vSU2MatrixF> LatticeSU2MatrixF;
typedef Lattice<vSU2MatrixD> LatticeSU2MatrixD;
////////////////////////////////////////////////////////////////////////
// There are N^2-1 generators for SU(N).
//
// We take a traceless hermitian generator basis as follows
//
// * Normalisation: trace ta tb = 1/2 delta_ab = T_F delta_ab
// T_F = 1/2 for SU(N) groups
//
// * Off diagonal
// - pairs of rows i1,i2 behaving like pauli matrices signma_x, sigma_y
//
// - there are (Nc-1-i1) slots for i2 on each row [ x 0 x ]
// direct count off each row
//
// - Sum of all pairs is Nc(Nc-1)/2: proof arithmetic series
//
// (Nc-1) + (Nc-2)+... 1 ==> Nc*(Nc-1)/2
// 1+ 2+ + + Nc-1
//
// - There are 2 x Nc (Nc-1)/ 2 of these = Nc^2 - Nc
//
// - We enumerate the row-col pairs.
// - for each row col pair there is a (sigma_x) and a (sigma_y) like
// generator
//
//
// t^a_ij = { in 0.. Nc(Nc-1)/2 -1} => 1/2(delta_{i,i1} delta_{j,i2} +
// delta_{i,i1} delta_{j,i2})
// t^a_ij = { in Nc(Nc-1)/2 ... Nc(Nc-1) - 1} => i/2( delta_{i,i1}
// delta_{j,i2} - i delta_{i,i1} delta_{j,i2})
//
// * Diagonal; must be traceless and normalised
// - Sequence is
// N (1,-1,0,0...)
// N (1, 1,-2,0...)
// N (1, 1, 1,-3,0...)
// N (1, 1, 1, 1,-4,0...)
//
// where 1/2 = N^2 (1+.. m^2)etc.... for the m-th diagonal generator
// NB this gives the famous SU3 result for su2 index 8
//
// N= sqrt(1/2 . 1/6 ) = 1/2 . 1/sqrt(3)
//
// ( 1 )
// ( 1 ) / sqrt(3) /2 = 1/2 lambda_8
// ( -2)
//
////////////////////////////////////////////////////////////////////////
template <class cplx>
static void generator(int lieIndex, iSUnMatrix<cplx> &ta) {
// map lie index to which type of generator
int diagIndex;
int su2Index;
int sigxy;
int NNm1 = ncolour * (ncolour - 1);
if (lieIndex >= NNm1) {
diagIndex = lieIndex - NNm1;
generatorDiagonal(diagIndex, ta);
return;
}
sigxy = lieIndex & 0x1; // even or odd
su2Index = lieIndex >> 1;
if (sigxy)
generatorSigmaY(su2Index, ta);
else
generatorSigmaX(su2Index, ta);
}
template <class cplx>
static void generatorSigmaY(int su2Index, iSUnMatrix<cplx> &ta) {
ta = Zero();
int i1, i2;
su2SubGroupIndex(i1, i2, su2Index);
ta()()(i1, i2) = 1.0;
ta()()(i2, i1) = 1.0;
ta = ta * 0.5;
}
template <class cplx>
static void generatorSigmaX(int su2Index, iSUnMatrix<cplx> &ta) {
ta = Zero();
cplx i(0.0, 1.0);
int i1, i2;
su2SubGroupIndex(i1, i2, su2Index);
ta()()(i1, i2) = i;
ta()()(i2, i1) = -i;
ta = ta * 0.5;
}
template <class cplx>
static void generatorDiagonal(int diagIndex, iSUnMatrix<cplx> &ta) {
// diag ({1, 1, ..., 1}(k-times), -k, 0, 0, ...)
ta = Zero();
int k = diagIndex + 1; // diagIndex starts from 0
for (int i = 0; i <= diagIndex; i++) { // k iterations
ta()()(i, i) = 1.0;
}
ta()()(k, k) = -k; // indexing starts from 0
RealD nrm = 1.0 / std::sqrt(2.0 * k * (k + 1));
ta = ta * nrm;
}
////////////////////////////////////////////////////////////////////////
// Map a su2 subgroup number to the pair of rows that are non zero
////////////////////////////////////////////////////////////////////////
static void su2SubGroupIndex(int &i1, int &i2, int su2_index) {
assert((su2_index >= 0) && (su2_index < (ncolour * (ncolour - 1)) / 2));
int spare = su2_index;
for (i1 = 0; spare >= (ncolour - 1 - i1); i1++) {
spare = spare - (ncolour - 1 - i1); // remove the Nc-1-i1 terms
}
i2 = i1 + 1 + spare;
}
//////////////////////////////////////////////////////////////////////////////////////////
// Pull out a subgroup and project on to real coeffs x pauli basis
//////////////////////////////////////////////////////////////////////////////////////////
template <class vcplx>
static void su2Extract(Lattice<iSinglet<vcplx> > &Determinant,
Lattice<iSU2Matrix<vcplx> > &subgroup,
const Lattice<iSUnMatrix<vcplx> > &source,
int su2_index) {
GridBase *grid(source.Grid());
conformable(subgroup, source);
conformable(subgroup, Determinant);
int i0, i1;
su2SubGroupIndex(i0, i1, su2_index);
autoView( subgroup_v , subgroup,AcceleratorWrite);
autoView( source_v , source,AcceleratorRead);
autoView( Determinant_v , Determinant,AcceleratorWrite);
accelerator_for(ss, grid->oSites(), 1, {
subgroup_v[ss]()()(0, 0) = source_v[ss]()()(i0, i0);
subgroup_v[ss]()()(0, 1) = source_v[ss]()()(i0, i1);
subgroup_v[ss]()()(1, 0) = source_v[ss]()()(i1, i0);
subgroup_v[ss]()()(1, 1) = source_v[ss]()()(i1, i1);
iSU2Matrix<vcplx> Sigma = subgroup_v[ss];
Sigma = Sigma - adj(Sigma) + trace(adj(Sigma));
subgroup_v[ss] = Sigma;
// this should be purely real
Determinant_v[ss] =
Sigma()()(0, 0) * Sigma()()(1, 1) - Sigma()()(0, 1) * Sigma()()(1, 0);
});
}
//////////////////////////////////////////////////////////////////////////////////////////
// Set matrix to one and insert a pauli subgroup
//////////////////////////////////////////////////////////////////////////////////////////
template <class vcplx>
static void su2Insert(const Lattice<iSU2Matrix<vcplx> > &subgroup,
Lattice<iSUnMatrix<vcplx> > &dest, int su2_index) {
GridBase *grid(dest.Grid());
conformable(subgroup, dest);
int i0, i1;
su2SubGroupIndex(i0, i1, su2_index);
dest = 1.0; // start out with identity
autoView( dest_v , dest, AcceleratorWrite);
autoView( subgroup_v, subgroup, AcceleratorRead);
accelerator_for(ss, grid->oSites(),1,
{
dest_v[ss]()()(i0, i0) = subgroup_v[ss]()()(0, 0);
dest_v[ss]()()(i0, i1) = subgroup_v[ss]()()(0, 1);
dest_v[ss]()()(i1, i0) = subgroup_v[ss]()()(1, 0);
dest_v[ss]()()(i1, i1) = subgroup_v[ss]()()(1, 1);
});
}
///////////////////////////////////////////////
// Generate e^{ Re Tr Staple Link} dlink
//
// *** Note Staple should be appropriate linear compbination between all
// staples.
// *** If already by beta pass coefficient 1.0.
// *** This routine applies the additional 1/Nc factor that comes after trace
// in action.
//
///////////////////////////////////////////////
static void SubGroupHeatBath(GridSerialRNG &sRNG, GridParallelRNG &pRNG,
RealD beta, // coeff multiplying staple in action (with no 1/Nc)
LatticeMatrix &link,
const LatticeMatrix &barestaple, // multiplied by action coeffs so th
int su2_subgroup, int nheatbath, LatticeInteger &wheremask)
{
GridBase *grid = link.Grid();
const RealD twopi = 2.0 * M_PI;
LatticeMatrix staple(grid);
staple = barestaple * (beta / ncolour);
LatticeMatrix V(grid);
V = link * staple;
// Subgroup manipulation in the lie algebra space
LatticeSU2Matrix u(grid); // Kennedy pendleton "u" real projected normalised Sigma
LatticeSU2Matrix uinv(grid);
LatticeSU2Matrix ua(grid); // a in pauli form
LatticeSU2Matrix b(grid); // rotated matrix after hb
// Some handy constant fields
LatticeComplex ones(grid);
ones = 1.0;
LatticeComplex zeros(grid);
zeros = Zero();
LatticeReal rones(grid);
rones = 1.0;
LatticeReal rzeros(grid);
rzeros = Zero();
LatticeComplex udet(grid); // determinant of real(staple)
LatticeInteger mask_true(grid);
mask_true = 1;
LatticeInteger mask_false(grid);
mask_false = 0;
/*
PLB 156 P393 (1985) (Kennedy and Pendleton)
Note: absorb "beta" into the def of sigma compared to KP paper; staple
passed to this routine has "beta" already multiplied in
Action linear in links h and of form:
beta S = beta Sum_p (1 - 1/Nc Re Tr Plaq )
Writing Sigma = 1/Nc (beta Sigma') where sum over staples is "Sigma' "
beta S = const - beta/Nc Re Tr h Sigma'
= const - Re Tr h Sigma
Decompose h and Sigma into (1, sigma_j) ; h_i real, h^2=1, Sigma_i complex
arbitrary.
Tr h Sigma = h_i Sigma_j Tr (sigma_i sigma_j) = h_i Sigma_j 2 delta_ij
Re Tr h Sigma = 2 h_j Re Sigma_j
Normalised re Sigma_j = xi u_j
With u_j a unit vector and U can be in SU(2);
Re Tr h Sigma = 2 h_j Re Sigma_j = 2 xi (h.u)
4xi^2 = Det [ Sig - Sig^dag + 1 Tr Sigdag]
u = 1/2xi [ Sig - Sig^dag + 1 Tr Sigdag]
xi = sqrt(Det)/2;
Write a= u h in SU(2); a has pauli decomp a_j;
Note: Product b' xi is unvariant because scaling Sigma leaves
normalised vector "u" fixed; Can rescale Sigma so b' = 1.
*/
////////////////////////////////////////////////////////
// Real part of Pauli decomposition
// Note a subgroup can project to zero in cold start
////////////////////////////////////////////////////////
su2Extract(udet, u, V, su2_subgroup);
//////////////////////////////////////////////////////
// Normalising this vector if possible; else identity
//////////////////////////////////////////////////////
LatticeComplex xi(grid);
LatticeSU2Matrix lident(grid);
SU2Matrix ident = Complex(1.0);
SU2Matrix pauli1;
SU<2>::generator(0, pauli1);
SU2Matrix pauli2;
SU<2>::generator(1, pauli2);
SU2Matrix pauli3;
SU<2>::generator(2, pauli3);
pauli1 = timesI(pauli1) * 2.0;
pauli2 = timesI(pauli2) * 2.0;
pauli3 = timesI(pauli3) * 2.0;
LatticeComplex cone(grid);
LatticeReal adet(grid);
adet = abs(toReal(udet));
lident = Complex(1.0);
cone = Complex(1.0);
Real machine_epsilon = 1.0e-7;
u = where(adet > machine_epsilon, u, lident);
udet = where(adet > machine_epsilon, udet, cone);
xi = 0.5 * sqrt(udet); // 4xi^2 = Det [ Sig - Sig^dag + 1 Tr Sigdag]
u = 0.5 * u *
pow(xi, -1.0); // u = 1/2xi [ Sig - Sig^dag + 1 Tr Sigdag]
// Debug test for sanity
uinv = adj(u);
b = u * uinv - 1.0;
assert(norm2(b) < 1.0e-4);
/*
Measure: Haar measure dh has d^4a delta(1-|a^2|)
In polars:
da = da0 r^2 sin theta dr dtheta dphi delta( 1 - r^2 -a0^2)
= da0 r^2 sin theta dr dtheta dphi delta( (sqrt(1-a0^) - r)(sqrt(1-a0^) +
r) )
= da0 r/2 sin theta dr dtheta dphi delta( (sqrt(1-a0^) - r) )
Action factor Q(h) dh = e^-S[h] dh = e^{ xi Tr uh} dh // beta enters
through xi
= e^{2 xi (h.u)} dh
= e^{2 xi h0u0}.e^{2 xi h1u1}.e^{2 xi
h2u2}.e^{2 xi h3u3} dh
Therefore for each site, take xi for that site
i) generate |a0|<1 with dist
(1-a0^2)^0.5 e^{2 xi a0 } da0
Take alpha = 2 xi = 2 xi [ recall 2 beta/Nc unmod staple norm]; hence 2.0/Nc
factor in Chroma ]
A. Generate two uniformly distributed pseudo-random numbers R and R', R'',
R''' in the unit interval;
B. Set X = -(ln R)/alpha, X' =-(ln R')/alpha;
C. Set C = cos^2(2pi R"), with R" another uniform random number in [0,1] ;
D. Set A = XC;
E. Let d = X'+A;
F. If R'''^2 :> 1 - 0.5 d, go back to A;
G. Set a0 = 1 - d;
Note that in step D setting B ~ X - A and using B in place of A in step E will
generate a second independent a 0 value.
*/
/////////////////////////////////////////////////////////
// count the number of sites by picking "1"'s out of hat
/////////////////////////////////////////////////////////
Integer hit = 0;
LatticeReal rtmp(grid);
rtmp = where(wheremask, rones, rzeros);
RealD numSites = sum(rtmp);
RealD numAccepted;
LatticeInteger Accepted(grid);
Accepted = Zero();
LatticeInteger newlyAccepted(grid);
std::vector<LatticeReal> xr(4, grid);
std::vector<LatticeReal> a(4, grid);
LatticeReal d(grid);
d = Zero();
LatticeReal alpha(grid);
// std::cout<<GridLogMessage<<"xi "<<xi <<std::endl;
xi = 2.0 *xi;
alpha = toReal(xi);
do {
// A. Generate two uniformly distributed pseudo-random numbers R and R',
// R'', R''' in the unit interval;
random(pRNG, xr[0]);
random(pRNG, xr[1]);
random(pRNG, xr[2]);
random(pRNG, xr[3]);
// B. Set X = - ln R/alpha, X' = -ln R'/alpha
xr[1] = -log(xr[1]) / alpha;
xr[2] = -log(xr[2]) / alpha;
// C. Set C = cos^2(2piR'')
xr[3] = cos(xr[3] * twopi);
xr[3] = xr[3] * xr[3];
LatticeReal xrsq(grid);
// D. Set A = XC;
// E. Let d = X'+A;
xrsq = xr[2] + xr[1] * xr[3];
d = where(Accepted, d, xr[2] + xr[1] * xr[3]);
// F. If R'''^2 :> 1 - 0.5 d, go back to A;
LatticeReal thresh(grid);
thresh = 1.0 - d * 0.5;
xrsq = xr[0] * xr[0];
LatticeInteger ione(grid);
ione = 1;
LatticeInteger izero(grid);
izero = Zero();
newlyAccepted = where(xrsq < thresh, ione, izero);
Accepted = where(newlyAccepted, newlyAccepted, Accepted);
Accepted = where(wheremask, Accepted, izero);
// FIXME need an iSum for integer to avoid overload on return type??
rtmp = where(Accepted, rones, rzeros);
numAccepted = sum(rtmp);
hit++;
} while ((numAccepted < numSites) && (hit < nheatbath));
// G. Set a0 = 1 - d;
a[0] = Zero();
a[0] = where(wheremask, 1.0 - d, a[0]);
//////////////////////////////////////////
// ii) generate a_i uniform on two sphere radius (1-a0^2)^0.5
//////////////////////////////////////////
LatticeReal a123mag(grid);
a123mag = sqrt(abs(1.0 - a[0] * a[0]));
LatticeReal cos_theta(grid);
LatticeReal sin_theta(grid);
LatticeReal phi(grid);
random(pRNG, phi);
phi = phi * twopi; // uniform in [0,2pi]
random(pRNG, cos_theta);
cos_theta = (cos_theta * 2.0) - 1.0; // uniform in [-1,1]
sin_theta = sqrt(abs(1.0 - cos_theta * cos_theta));
a[1] = a123mag * sin_theta * cos(phi);
a[2] = a123mag * sin_theta * sin(phi);
a[3] = a123mag * cos_theta;
ua = toComplex(a[0]) * ident + toComplex(a[1]) * pauli1 +
toComplex(a[2]) * pauli2 + toComplex(a[3]) * pauli3;
b = 1.0;
b = where(wheremask, uinv * ua, b);
su2Insert(b, V, su2_subgroup);
// mask the assignment back based on Accptance
link = where(Accepted, V * link, link);
//////////////////////////////
// Debug Checks
// SU2 check
LatticeSU2Matrix check(grid); // rotated matrix after hb
u = Zero();
check = ua * adj(ua) - 1.0;
check = where(Accepted, check, u);
assert(norm2(check) < 1.0e-4);
check = b * adj(b) - 1.0;
check = where(Accepted, check, u);
assert(norm2(check) < 1.0e-4);
LatticeMatrix Vcheck(grid);
Vcheck = Zero();
Vcheck = where(Accepted, V * adj(V) - 1.0, Vcheck);
// std::cout<<GridLogMessage << "SU3 check " <<norm2(Vcheck)<<std::endl;
assert(norm2(Vcheck) < 1.0e-4);
// Verify the link stays in SU(3)
// std::cout<<GridLogMessage <<"Checking the modified link"<<std::endl;
Vcheck = link * adj(link) - 1.0;
assert(norm2(Vcheck) < 1.0e-4);
/////////////////////////////////
}
static void printGenerators(void) {
for (int gen = 0; gen < AdjointDimension; gen++) {
Matrix ta;
generator(gen, ta);
std::cout << GridLogMessage << "Nc = " << ncolour << " t_" << gen
<< std::endl;
std::cout << GridLogMessage << ta << std::endl;
}
}
static void testGenerators(void) {
Matrix ta;
Matrix tb;
std::cout << GridLogMessage
<< "Fundamental - Checking trace ta tb is 0.5 delta_ab"
<< std::endl;
for (int a = 0; a < AdjointDimension; a++) {
for (int b = 0; b < AdjointDimension; b++) {
generator(a, ta);
generator(b, tb);
Complex tr = TensorRemove(trace(ta * tb));
std::cout << GridLogMessage << "(" << a << "," << b << ") = " << tr
<< std::endl;
if (a == b) assert(abs(tr - Complex(0.5)) < 1.0e-6);
if (a != b) assert(abs(tr) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
}
std::cout << GridLogMessage << "Fundamental - Checking if hermitian"
<< std::endl;
for (int a = 0; a < AdjointDimension; a++) {
generator(a, ta);
std::cout << GridLogMessage << a << std::endl;
assert(norm2(ta - adj(ta)) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage << "Fundamental - Checking if traceless"
<< std::endl;
for (int a = 0; a < AdjointDimension; a++) {
generator(a, ta);
Complex tr = TensorRemove(trace(ta));
std::cout << GridLogMessage << a << " " << std::endl;
assert(abs(tr) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
}
// reunitarise??
template <typename LatticeMatrixType>
static void LieRandomize(GridParallelRNG &pRNG, LatticeMatrixType &out, double scale = 1.0)
{
GridBase *grid = out.Grid();
typedef typename LatticeMatrixType::vector_type vector_type;
typedef iSinglet<vector_type> vTComplexType;
typedef Lattice<vTComplexType> LatticeComplexType;
typedef typename GridTypeMapper<typename LatticeMatrixType::vector_object>::scalar_object MatrixType;
LatticeComplexType ca(grid);
LatticeMatrixType lie(grid);
LatticeMatrixType la(grid);
ComplexD ci(0.0, scale);
// ComplexD cone(1.0, 0.0);
MatrixType ta;
lie = Zero();
for (int a = 0; a < AdjointDimension; a++) {
random(pRNG, ca);
ca = (ca + conjugate(ca)) * 0.5;
ca = ca - 0.5;
generator(a, ta);
la = ci * ca * ta;
lie = lie + la; // e^{i la ta}
}
taExp(lie, out);
}
static void GaussianFundamentalLieAlgebraMatrix(GridParallelRNG &pRNG,
LatticeMatrix &out,
Real scale = 1.0) {
GridBase *grid = out.Grid();
LatticeReal ca(grid);
LatticeMatrix la(grid);
Complex ci(0.0, scale);
Matrix ta;
out = Zero();
for (int a = 0; a < AdjointDimension; a++) {
gaussian(pRNG, ca);
generator(a, ta);
la = toComplex(ca) * ta;
out += la;
}
out *= ci;
}
static void FundamentalLieAlgebraMatrix(const LatticeAlgebraVector &h,
LatticeMatrix &out,
Real scale = 1.0) {
conformable(h, out);
GridBase *grid = out.Grid();
LatticeMatrix la(grid);
Matrix ta;
out = Zero();
for (int a = 0; a < AdjointDimension; a++) {
generator(a, ta);
la = peekColour(h, a) * timesI(ta) * scale;
out += la;
}
}
/*
* Fundamental rep gauge xform
*/
template<typename Fundamental,typename GaugeMat>
static void GaugeTransformFundamental( Fundamental &ferm, GaugeMat &g){
GridBase *grid = ferm._grid;
conformable(grid,g._grid);
ferm = g*ferm;
}
/*
* Adjoint rep gauge xform
*/
template<typename Gimpl>
static void GaugeTransform(typename Gimpl::GaugeField &Umu, typename Gimpl::GaugeLinkField &g){
GridBase *grid = Umu.Grid();
conformable(grid,g.Grid());
typename Gimpl::GaugeLinkField U(grid);
typename Gimpl::GaugeLinkField ag(grid); ag = adj(g);
for(int mu=0;mu<Nd;mu++){
U= PeekIndex<LorentzIndex>(Umu,mu);
U = g*U*Gimpl::CshiftLink(ag, mu, 1); //BC-aware
PokeIndex<LorentzIndex>(Umu,U,mu);
}
}
template<typename Gimpl>
static void GaugeTransform( std::vector<typename Gimpl::GaugeLinkField> &U, typename Gimpl::GaugeLinkField &g){
GridBase *grid = g.Grid();
typename Gimpl::GaugeLinkField ag(grid); ag = adj(g);
for(int mu=0;mu<Nd;mu++){
U[mu] = g*U[mu]*Gimpl::CshiftLink(ag, mu, 1); //BC-aware
}
}
template<typename Gimpl>
static void RandomGaugeTransform(GridParallelRNG &pRNG, typename Gimpl::GaugeField &Umu, typename Gimpl::GaugeLinkField &g){
LieRandomize(pRNG,g,1.0);
GaugeTransform<Gimpl>(Umu,g);
}
// Projects the algebra components a lattice matrix (of dimension ncol*ncol -1 )
// inverse operation: FundamentalLieAlgebraMatrix
static void projectOnAlgebra(LatticeAlgebraVector &h_out, const LatticeMatrix &in, Real scale = 1.0) {
conformable(h_out, in);
h_out = Zero();
Matrix Ta;
for (int a = 0; a < AdjointDimension; a++) {
generator(a, Ta);
pokeColour(h_out, - 2.0 * (trace(timesI(Ta) * in)) * scale, a);
}
}
template <typename GaugeField>
static void HotConfiguration(GridParallelRNG &pRNG, GaugeField &out) {
typedef typename GaugeField::vector_type vector_type;
typedef iSUnMatrix<vector_type> vMatrixType;
typedef Lattice<vMatrixType> LatticeMatrixType;
LatticeMatrixType Umu(out.Grid());
for (int mu = 0; mu < Nd; mu++) {
LieRandomize(pRNG, Umu, 1.0);
PokeIndex<LorentzIndex>(out, Umu, mu);
}
}
template<typename GaugeField>
static void TepidConfiguration(GridParallelRNG &pRNG,GaugeField &out){
typedef typename GaugeField::vector_type vector_type;
typedef iSUnMatrix<vector_type> vMatrixType;
typedef Lattice<vMatrixType> LatticeMatrixType;
LatticeMatrixType Umu(out.Grid());
for(int mu=0;mu<Nd;mu++){
LieRandomize(pRNG,Umu,0.01);
PokeIndex<LorentzIndex>(out,Umu,mu);
}
}
template<typename GaugeField>
static void ColdConfiguration(GaugeField &out){
typedef typename GaugeField::vector_type vector_type;
typedef iSUnMatrix<vector_type> vMatrixType;
typedef Lattice<vMatrixType> LatticeMatrixType;
LatticeMatrixType Umu(out.Grid());
Umu=1.0;
for(int mu=0;mu<Nd;mu++){
PokeIndex<LorentzIndex>(out,Umu,mu);
}
}
template<typename GaugeField>
static void ColdConfiguration(GridParallelRNG &pRNG,GaugeField &out){
ColdConfiguration(out);
}
template<typename LatticeMatrixType>
static void taProj( const LatticeMatrixType &in, LatticeMatrixType &out){
out = Ta(in);
}
template <typename LatticeMatrixType>
static void taExp(const LatticeMatrixType &x, LatticeMatrixType &ex) {
typedef typename LatticeMatrixType::scalar_type ComplexType;
LatticeMatrixType xn(x.Grid());
RealD nfac = 1.0;
xn = x;
ex = xn + ComplexType(1.0); // 1+x
// Do a 12th order exponentiation
for (int i = 2; i <= 12; ++i) {
nfac = nfac / RealD(i); // 1/2, 1/2.3 ...
xn = xn * x; // x2, x3,x4....
ex = ex + xn * nfac; // x2/2!, x3/3!....
}
}
};
template<int N>
LatticeComplexD Determinant(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
{
GridBase *grid=Umu.Grid();
auto lvol = grid->lSites();
LatticeComplexD ret(grid);
autoView(Umu_v,Umu,CpuRead);
autoView(ret_v,ret,CpuWrite);
thread_for(site,lvol,{
Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
iScalar<iScalar<iMatrix<ComplexD, N> > > Us;
peekLocalSite(Us, Umu_v, lcoor);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
EigenU(i,j) = Us()()(i,j);
}}
ComplexD det = EigenU.determinant();
pokeLocalSite(det,ret_v,lcoor);
});
return ret;
}
template<int N>
static void ProjectSUn(Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
{
Umu = ProjectOnGroup(Umu);
auto det = Determinant(Umu);
det = conjugate(det);
for(int i=0;i<N;i++){
auto element = PeekIndex<ColourIndex>(Umu,N-1,i);
element = element * det;
PokeIndex<ColourIndex>(Umu,element,Nc-1,i);
}
}
template<int N>
static void ProjectSUn(Lattice<iVector<iScalar<iMatrix<vComplexD, N> >,Nd> > &U)
{
GridBase *grid=U.Grid();
// Reunitarise
for(int mu=0;mu<Nd;mu++){
auto Umu = PeekIndex<LorentzIndex>(U,mu);
Umu = ProjectOnGroup(Umu);
ProjectSUn(Umu);
PokeIndex<LorentzIndex>(U,Umu,mu);
}
}
// Explicit specialisation for SU(3).
// Explicit specialisation for SU(3).
static void
ProjectSU3 (Lattice<iScalar<iScalar<iMatrix<vComplexD, 3> > > > &Umu)
{
GridBase *grid=Umu.Grid();
const int x=0;
const int y=1;
const int z=2;
// Reunitarise
Umu = ProjectOnGroup(Umu);
autoView(Umu_v,Umu,CpuWrite);
thread_for(ss,grid->oSites(),{
auto cm = Umu_v[ss];
cm()()(2,x) = adj(cm()()(0,y)*cm()()(1,z)-cm()()(0,z)*cm()()(1,y)); //x= yz-zy
cm()()(2,y) = adj(cm()()(0,z)*cm()()(1,x)-cm()()(0,x)*cm()()(1,z)); //y= zx-xz
cm()()(2,z) = adj(cm()()(0,x)*cm()()(1,y)-cm()()(0,y)*cm()()(1,x)); //z= xy-yx
Umu_v[ss]=cm;
});
}
static void ProjectSU3(Lattice<iVector<iScalar<iMatrix<vComplexD, 3> >,Nd> > &U)
{
GridBase *grid=U.Grid();
// Reunitarise
for(int mu=0;mu<Nd;mu++){
auto Umu = PeekIndex<LorentzIndex>(U,mu);
Umu = ProjectOnGroup(Umu);
ProjectSU3(Umu);
PokeIndex<LorentzIndex>(U,Umu,mu);
}
}
typedef SU<2> SU2;
typedef SU<3> SU3;
typedef SU<4> SU4;
typedef SU<5> SU5;
typedef SU<Nc> FundamentalMatrices;
NAMESPACE_END(Grid);
#endif

580
Grid/qcd/utils/SUn.impl.h Normal file
View File

@@ -0,0 +1,580 @@
// This file is #included into the body of the class template definition of
// GaugeGroup. So, image there to be
//
// template <int ncolour, class group_name>
// class GaugeGroup {
//
// around it.
//
// Please note that the unconventional file extension makes sure that it
// doesn't get found by the scripts/filelist during bootstrapping.
private:
template <ONLY_IF_SU>
static int su2subgroups(GroupName::SU) { return (ncolour * (ncolour - 1)) / 2; }
////////////////////////////////////////////////////////////////////////
// There are N^2-1 generators for SU(N).
//
// We take a traceless hermitian generator basis as follows
//
// * Normalisation: trace ta tb = 1/2 delta_ab = T_F delta_ab
// T_F = 1/2 for SU(N) groups
//
// * Off diagonal
// - pairs of rows i1,i2 behaving like pauli matrices signma_x, sigma_y
//
// - there are (Nc-1-i1) slots for i2 on each row [ x 0 x ]
// direct count off each row
//
// - Sum of all pairs is Nc(Nc-1)/2: proof arithmetic series
//
// (Nc-1) + (Nc-2)+... 1 ==> Nc*(Nc-1)/2
// 1+ 2+ + + Nc-1
//
// - There are 2 x Nc (Nc-1)/ 2 of these = Nc^2 - Nc
//
// - We enumerate the row-col pairs.
// - for each row col pair there is a (sigma_x) and a (sigma_y) like
// generator
//
//
// t^a_ij = { in 0.. Nc(Nc-1)/2 -1} => 1/2(delta_{i,i1} delta_{j,i2} +
// delta_{i,i1} delta_{j,i2})
// t^a_ij = { in Nc(Nc-1)/2 ... Nc(Nc-1) - 1} => i/2( delta_{i,i1}
// delta_{j,i2} - i delta_{i,i1} delta_{j,i2})
//
// * Diagonal; must be traceless and normalised
// - Sequence is
// N (1,-1,0,0...)
// N (1, 1,-2,0...)
// N (1, 1, 1,-3,0...)
// N (1, 1, 1, 1,-4,0...)
//
// where 1/2 = N^2 (1+.. m^2)etc.... for the m-th diagonal generator
// NB this gives the famous SU3 result for su2 index 8
//
// N= sqrt(1/2 . 1/6 ) = 1/2 . 1/sqrt(3)
//
// ( 1 )
// ( 1 ) / sqrt(3) /2 = 1/2 lambda_8
// ( -2)
//
////////////////////////////////////////////////////////////////////////
template <class cplx, ONLY_IF_SU>
static void generator(int lieIndex, iGroupMatrix<cplx> &ta, GroupName::SU) {
// map lie index to which type of generator
int diagIndex;
int su2Index;
int sigxy;
int NNm1 = ncolour * (ncolour - 1);
if (lieIndex >= NNm1) {
diagIndex = lieIndex - NNm1;
generatorDiagonal(diagIndex, ta);
return;
}
sigxy = lieIndex & 0x1; // even or odd
su2Index = lieIndex >> 1;
if (sigxy)
generatorSigmaY(su2Index, ta);
else
generatorSigmaX(su2Index, ta);
}
template <class cplx, ONLY_IF_SU>
static void generatorSigmaY(int su2Index, iGroupMatrix<cplx> &ta) {
ta = Zero();
int i1, i2;
su2SubGroupIndex(i1, i2, su2Index);
ta()()(i1, i2) = 1.0;
ta()()(i2, i1) = 1.0;
ta = ta * 0.5;
}
template <class cplx, ONLY_IF_SU>
static void generatorSigmaX(int su2Index, iGroupMatrix<cplx> &ta) {
ta = Zero();
cplx i(0.0, 1.0);
int i1, i2;
su2SubGroupIndex(i1, i2, su2Index);
ta()()(i1, i2) = i;
ta()()(i2, i1) = -i;
ta = ta * 0.5;
}
template <class cplx, ONLY_IF_SU>
static void generatorDiagonal(int diagIndex, iGroupMatrix<cplx> &ta) {
// diag ({1, 1, ..., 1}(k-times), -k, 0, 0, ...)
ta = Zero();
int k = diagIndex + 1; // diagIndex starts from 0
for (int i = 0; i <= diagIndex; i++) { // k iterations
ta()()(i, i) = 1.0;
}
ta()()(k, k) = -k; // indexing starts from 0
RealD nrm = 1.0 / std::sqrt(2.0 * k * (k + 1));
ta = ta * nrm;
}
////////////////////////////////////////////////////////////////////////
// Map a su2 subgroup number to the pair of rows that are non zero
////////////////////////////////////////////////////////////////////////
static void su2SubGroupIndex(int &i1, int &i2, int su2_index, GroupName::SU) {
assert((su2_index >= 0) && (su2_index < (ncolour * (ncolour - 1)) / 2));
int spare = su2_index;
for (i1 = 0; spare >= (ncolour - 1 - i1); i1++) {
spare = spare - (ncolour - 1 - i1); // remove the Nc-1-i1 terms
}
i2 = i1 + 1 + spare;
}
public:
//////////////////////////////////////////////////////////////////////////////////////////
// Pull out a subgroup and project on to real coeffs x pauli basis
//////////////////////////////////////////////////////////////////////////////////////////
template <class vcplx, ONLY_IF_SU>
static void su2Extract(Lattice<iSinglet<vcplx> > &Determinant,
Lattice<iSU2Matrix<vcplx> > &subgroup,
const Lattice<iGroupMatrix<vcplx> > &source,
int su2_index) {
GridBase *grid(source.Grid());
conformable(subgroup, source);
conformable(subgroup, Determinant);
int i0, i1;
su2SubGroupIndex(i0, i1, su2_index);
autoView(subgroup_v, subgroup, AcceleratorWrite);
autoView(source_v, source, AcceleratorRead);
autoView(Determinant_v, Determinant, AcceleratorWrite);
accelerator_for(ss, grid->oSites(), 1, {
subgroup_v[ss]()()(0, 0) = source_v[ss]()()(i0, i0);
subgroup_v[ss]()()(0, 1) = source_v[ss]()()(i0, i1);
subgroup_v[ss]()()(1, 0) = source_v[ss]()()(i1, i0);
subgroup_v[ss]()()(1, 1) = source_v[ss]()()(i1, i1);
iSU2Matrix<vcplx> Sigma = subgroup_v[ss];
Sigma = Sigma - adj(Sigma) + trace(adj(Sigma));
subgroup_v[ss] = Sigma;
// this should be purely real
Determinant_v[ss] =
Sigma()()(0, 0) * Sigma()()(1, 1) - Sigma()()(0, 1) * Sigma()()(1, 0);
});
}
//////////////////////////////////////////////////////////////////////////////////////////
// Set matrix to one and insert a pauli subgroup
//////////////////////////////////////////////////////////////////////////////////////////
template <class vcplx, ONLY_IF_SU>
static void su2Insert(const Lattice<iSU2Matrix<vcplx> > &subgroup,
Lattice<iGroupMatrix<vcplx> > &dest, int su2_index) {
GridBase *grid(dest.Grid());
conformable(subgroup, dest);
int i0, i1;
su2SubGroupIndex(i0, i1, su2_index);
dest = 1.0; // start out with identity
autoView(dest_v, dest, AcceleratorWrite);
autoView(subgroup_v, subgroup, AcceleratorRead);
accelerator_for(ss, grid->oSites(), 1, {
dest_v[ss]()()(i0, i0) = subgroup_v[ss]()()(0, 0);
dest_v[ss]()()(i0, i1) = subgroup_v[ss]()()(0, 1);
dest_v[ss]()()(i1, i0) = subgroup_v[ss]()()(1, 0);
dest_v[ss]()()(i1, i1) = subgroup_v[ss]()()(1, 1);
});
}
///////////////////////////////////////////////
// Generate e^{ Re Tr Staple Link} dlink
//
// *** Note Staple should be appropriate linear compbination between all
// staples.
// *** If already by beta pass coefficient 1.0.
// *** This routine applies the additional 1/Nc factor that comes after trace
// in action.
//
///////////////////////////////////////////////
template <ONLY_IF_SU>
static void SubGroupHeatBath(
GridSerialRNG &sRNG, GridParallelRNG &pRNG,
RealD beta, // coeff multiplying staple in action (with no 1/Nc)
LatticeMatrix &link,
const LatticeMatrix &barestaple, // multiplied by action coeffs so th
int su2_subgroup, int nheatbath, LatticeInteger &wheremask) {
GridBase *grid = link.Grid();
const RealD twopi = 2.0 * M_PI;
LatticeMatrix staple(grid);
staple = barestaple * (beta / ncolour);
LatticeMatrix V(grid);
V = link * staple;
// Subgroup manipulation in the lie algebra space
LatticeSU2Matrix u(
grid); // Kennedy pendleton "u" real projected normalised Sigma
LatticeSU2Matrix uinv(grid);
LatticeSU2Matrix ua(grid); // a in pauli form
LatticeSU2Matrix b(grid); // rotated matrix after hb
// Some handy constant fields
LatticeComplex ones(grid);
ones = 1.0;
LatticeComplex zeros(grid);
zeros = Zero();
LatticeReal rones(grid);
rones = 1.0;
LatticeReal rzeros(grid);
rzeros = Zero();
LatticeComplex udet(grid); // determinant of real(staple)
LatticeInteger mask_true(grid);
mask_true = 1;
LatticeInteger mask_false(grid);
mask_false = 0;
/*
PLB 156 P393 (1985) (Kennedy and Pendleton)
Note: absorb "beta" into the def of sigma compared to KP paper; staple
passed to this routine has "beta" already multiplied in
Action linear in links h and of form:
beta S = beta Sum_p (1 - 1/Nc Re Tr Plaq )
Writing Sigma = 1/Nc (beta Sigma') where sum over staples is "Sigma' "
beta S = const - beta/Nc Re Tr h Sigma'
= const - Re Tr h Sigma
Decompose h and Sigma into (1, sigma_j) ; h_i real, h^2=1, Sigma_i complex
arbitrary.
Tr h Sigma = h_i Sigma_j Tr (sigma_i sigma_j) = h_i Sigma_j 2 delta_ij
Re Tr h Sigma = 2 h_j Re Sigma_j
Normalised re Sigma_j = xi u_j
With u_j a unit vector and U can be in SU(2);
Re Tr h Sigma = 2 h_j Re Sigma_j = 2 xi (h.u)
4xi^2 = Det [ Sig - Sig^dag + 1 Tr Sigdag]
u = 1/2xi [ Sig - Sig^dag + 1 Tr Sigdag]
xi = sqrt(Det)/2;
Write a= u h in SU(2); a has pauli decomp a_j;
Note: Product b' xi is unvariant because scaling Sigma leaves
normalised vector "u" fixed; Can rescale Sigma so b' = 1.
*/
////////////////////////////////////////////////////////
// Real part of Pauli decomposition
// Note a subgroup can project to zero in cold start
////////////////////////////////////////////////////////
su2Extract(udet, u, V, su2_subgroup);
//////////////////////////////////////////////////////
// Normalising this vector if possible; else identity
//////////////////////////////////////////////////////
LatticeComplex xi(grid);
LatticeSU2Matrix lident(grid);
SU2Matrix ident = Complex(1.0);
SU2Matrix pauli1;
GaugeGroup<2, GroupName::SU>::generator(0, pauli1);
SU2Matrix pauli2;
GaugeGroup<2, GroupName::SU>::generator(1, pauli2);
SU2Matrix pauli3;
GaugeGroup<2, GroupName::SU>::generator(2, pauli3);
pauli1 = timesI(pauli1) * 2.0;
pauli2 = timesI(pauli2) * 2.0;
pauli3 = timesI(pauli3) * 2.0;
LatticeComplex cone(grid);
LatticeReal adet(grid);
adet = abs(toReal(udet));
lident = Complex(1.0);
cone = Complex(1.0);
Real machine_epsilon = 1.0e-7;
u = where(adet > machine_epsilon, u, lident);
udet = where(adet > machine_epsilon, udet, cone);
xi = 0.5 * sqrt(udet); // 4xi^2 = Det [ Sig - Sig^dag + 1 Tr Sigdag]
u = 0.5 * u * pow(xi, -1.0); // u = 1/2xi [ Sig - Sig^dag + 1 Tr Sigdag]
// Debug test for sanity
uinv = adj(u);
b = u * uinv - 1.0;
assert(norm2(b) < 1.0e-4);
/*
Measure: Haar measure dh has d^4a delta(1-|a^2|)
In polars:
da = da0 r^2 sin theta dr dtheta dphi delta( 1 - r^2 -a0^2)
= da0 r^2 sin theta dr dtheta dphi delta( (sqrt(1-a0^) - r)(sqrt(1-a0^) +
r) )
= da0 r/2 sin theta dr dtheta dphi delta( (sqrt(1-a0^) - r) )
Action factor Q(h) dh = e^-S[h] dh = e^{ xi Tr uh} dh // beta
enters through xi = e^{2 xi (h.u)} dh = e^{2 xi h0u0}.e^{2 xi h1u1}.e^{2
xi h2u2}.e^{2 xi h3u3} dh
Therefore for each site, take xi for that site
i) generate |a0|<1 with dist
(1-a0^2)^0.5 e^{2 xi a0 } da0
Take alpha = 2 xi = 2 xi [ recall 2 beta/Nc unmod staple norm];
hence 2.0/Nc factor in Chroma ] A. Generate two uniformly distributed
pseudo-random numbers R and R', R'', R''' in the unit interval; B. Set X =
-(ln R)/alpha, X' =-(ln R')/alpha; C. Set C = cos^2(2pi R"), with R"
another uniform random number in [0,1] ; D. Set A = XC; E. Let d = X'+A;
F. If R'''^2 :> 1 - 0.5 d, go back to A;
G. Set a0 = 1 - d;
Note that in step D setting B ~ X - A and using B in place of A in step E
will generate a second independent a 0 value.
*/
/////////////////////////////////////////////////////////
// count the number of sites by picking "1"'s out of hat
/////////////////////////////////////////////////////////
Integer hit = 0;
LatticeReal rtmp(grid);
rtmp = where(wheremask, rones, rzeros);
RealD numSites = sum(rtmp);
RealD numAccepted;
LatticeInteger Accepted(grid);
Accepted = Zero();
LatticeInteger newlyAccepted(grid);
std::vector<LatticeReal> xr(4, grid);
std::vector<LatticeReal> a(4, grid);
LatticeReal d(grid);
d = Zero();
LatticeReal alpha(grid);
// std::cout<<GridLogMessage<<"xi "<<xi <<std::endl;
xi = 2.0 * xi;
alpha = toReal(xi);
do {
// A. Generate two uniformly distributed pseudo-random numbers R and R',
// R'', R''' in the unit interval;
random(pRNG, xr[0]);
random(pRNG, xr[1]);
random(pRNG, xr[2]);
random(pRNG, xr[3]);
// B. Set X = - ln R/alpha, X' = -ln R'/alpha
xr[1] = -log(xr[1]) / alpha;
xr[2] = -log(xr[2]) / alpha;
// C. Set C = cos^2(2piR'')
xr[3] = cos(xr[3] * twopi);
xr[3] = xr[3] * xr[3];
LatticeReal xrsq(grid);
// D. Set A = XC;
// E. Let d = X'+A;
xrsq = xr[2] + xr[1] * xr[3];
d = where(Accepted, d, xr[2] + xr[1] * xr[3]);
// F. If R'''^2 :> 1 - 0.5 d, go back to A;
LatticeReal thresh(grid);
thresh = 1.0 - d * 0.5;
xrsq = xr[0] * xr[0];
LatticeInteger ione(grid);
ione = 1;
LatticeInteger izero(grid);
izero = Zero();
newlyAccepted = where(xrsq < thresh, ione, izero);
Accepted = where(newlyAccepted, newlyAccepted, Accepted);
Accepted = where(wheremask, Accepted, izero);
// FIXME need an iSum for integer to avoid overload on return type??
rtmp = where(Accepted, rones, rzeros);
numAccepted = sum(rtmp);
hit++;
} while ((numAccepted < numSites) && (hit < nheatbath));
// G. Set a0 = 1 - d;
a[0] = Zero();
a[0] = where(wheremask, 1.0 - d, a[0]);
//////////////////////////////////////////
// ii) generate a_i uniform on two sphere radius (1-a0^2)^0.5
//////////////////////////////////////////
LatticeReal a123mag(grid);
a123mag = sqrt(abs(1.0 - a[0] * a[0]));
LatticeReal cos_theta(grid);
LatticeReal sin_theta(grid);
LatticeReal phi(grid);
random(pRNG, phi);
phi = phi * twopi; // uniform in [0,2pi]
random(pRNG, cos_theta);
cos_theta = (cos_theta * 2.0) - 1.0; // uniform in [-1,1]
sin_theta = sqrt(abs(1.0 - cos_theta * cos_theta));
a[1] = a123mag * sin_theta * cos(phi);
a[2] = a123mag * sin_theta * sin(phi);
a[3] = a123mag * cos_theta;
ua = toComplex(a[0]) * ident + toComplex(a[1]) * pauli1 +
toComplex(a[2]) * pauli2 + toComplex(a[3]) * pauli3;
b = 1.0;
b = where(wheremask, uinv * ua, b);
su2Insert(b, V, su2_subgroup);
// mask the assignment back based on Accptance
link = where(Accepted, V * link, link);
//////////////////////////////
// Debug Checks
// SU2 check
LatticeSU2Matrix check(grid); // rotated matrix after hb
u = Zero();
check = ua * adj(ua) - 1.0;
check = where(Accepted, check, u);
assert(norm2(check) < 1.0e-4);
check = b * adj(b) - 1.0;
check = where(Accepted, check, u);
assert(norm2(check) < 1.0e-4);
LatticeMatrix Vcheck(grid);
Vcheck = Zero();
Vcheck = where(Accepted, V * adj(V) - 1.0, Vcheck);
// std::cout<<GridLogMessage << "SU3 check " <<norm2(Vcheck)<<std::endl;
assert(norm2(Vcheck) < 1.0e-4);
// Verify the link stays in SU(3)
// std::cout<<GridLogMessage <<"Checking the modified link"<<std::endl;
Vcheck = link * adj(link) - 1.0;
assert(norm2(Vcheck) < 1.0e-4);
/////////////////////////////////
}
template <ONLY_IF_SU>
static void testGenerators(GroupName::SU) {
Matrix ta;
Matrix tb;
std::cout << GridLogMessage
<< "Fundamental - Checking trace ta tb is 0.5 delta_ab"
<< std::endl;
for (int a = 0; a < AdjointDimension; a++) {
for (int b = 0; b < AdjointDimension; b++) {
generator(a, ta);
generator(b, tb);
Complex tr = TensorRemove(trace(ta * tb));
std::cout << GridLogMessage << "(" << a << "," << b << ") = " << tr
<< std::endl;
if (a == b) assert(abs(tr - Complex(0.5)) < 1.0e-6);
if (a != b) assert(abs(tr) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
}
std::cout << GridLogMessage << "Fundamental - Checking if hermitian"
<< std::endl;
for (int a = 0; a < AdjointDimension; a++) {
generator(a, ta);
std::cout << GridLogMessage << a << std::endl;
assert(norm2(ta - adj(ta)) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage << "Fundamental - Checking if traceless"
<< std::endl;
for (int a = 0; a < AdjointDimension; a++) {
generator(a, ta);
Complex tr = TensorRemove(trace(ta));
std::cout << GridLogMessage << a << " " << std::endl;
assert(abs(tr) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
}
template <int N, class vtype>
static Lattice<iScalar<iScalar<iMatrix<vtype, N> > > >
ProjectOnGeneralGroup(const Lattice<iScalar<iScalar<iMatrix<vtype, N> > > > &Umu, GroupName::SU) {
return ProjectOnGroup(Umu);
}
template <class vtype>
accelerator_inline static iScalar<vtype> ProjectOnGeneralGroup(const iScalar<vtype> &r, GroupName::SU) {
return ProjectOnGroup(r);
}
template <class vtype, int N>
accelerator_inline static iVector<vtype,N> ProjectOnGeneralGroup(const iVector<vtype,N> &r, GroupName::SU) {
return ProjectOnGroup(r);
}
template <class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
accelerator_inline static iMatrix<vtype,N> ProjectOnGeneralGroup(const iMatrix<vtype,N> &arg, GroupName::SU) {
return ProjectOnGroup(arg);
}
template <typename LatticeMatrixType>
static void taProj(const LatticeMatrixType &in, LatticeMatrixType &out, GroupName::SU) {
out = Ta(in);
}
/*
* Fundamental rep gauge xform
*/
template<typename Fundamental,typename GaugeMat>
static void GaugeTransformFundamental( Fundamental &ferm, GaugeMat &g){
GridBase *grid = ferm._grid;
conformable(grid,g._grid);
ferm = g*ferm;
}
/*
* Adjoint rep gauge xform
*/
template<typename Gimpl>
static void GaugeTransform(typename Gimpl::GaugeField &Umu, typename Gimpl::GaugeLinkField &g){
GridBase *grid = Umu.Grid();
conformable(grid,g.Grid());
typename Gimpl::GaugeLinkField U(grid);
typename Gimpl::GaugeLinkField ag(grid); ag = adj(g);
for(int mu=0;mu<Nd;mu++){
U= PeekIndex<LorentzIndex>(Umu,mu);
U = g*U*Gimpl::CshiftLink(ag, mu, 1); //BC-aware
PokeIndex<LorentzIndex>(Umu,U,mu);
}
}
template<typename Gimpl>
static void GaugeTransform( std::vector<typename Gimpl::GaugeLinkField> &U, typename Gimpl::GaugeLinkField &g){
GridBase *grid = g.Grid();
typename Gimpl::GaugeLinkField ag(grid); ag = adj(g);
for(int mu=0;mu<Nd;mu++){
U[mu] = g*U[mu]*Gimpl::CshiftLink(ag, mu, 1); //BC-aware
}
}
template<typename Gimpl>
static void RandomGaugeTransform(GridParallelRNG &pRNG, typename Gimpl::GaugeField &Umu, typename Gimpl::GaugeLinkField &g){
LieRandomize(pRNG,g,1.0);
GaugeTransform<Gimpl>(Umu,g);
}

View File

@@ -52,13 +52,18 @@ public:
typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> > LatticeAdjFieldD;
template <typename vtype>
using iSUnMatrix = iScalar<iScalar<iMatrix<vtype, ncolour> > >;
typedef Lattice<iScalar<iScalar<iVector<vComplex, Dimension> > > > LatticeAdjVector;
template <class cplx>
static void generator(int Index, iSUnAdjointMatrix<cplx> &iAdjTa) {
// returns i(T_Adj)^index necessary for the projectors
// see definitions above
iAdjTa = Zero();
Vector<typename SU<ncolour>::template iSUnMatrix<cplx> > ta(ncolour * ncolour - 1);
typename SU<ncolour>::template iSUnMatrix<cplx> tmp;
Vector<iSUnMatrix<cplx> > ta(ncolour * ncolour - 1);
iSUnMatrix<cplx> tmp;
// FIXME not very efficient to get all the generators everytime
for (int a = 0; a < Dimension; a++) SU<ncolour>::generator(a, ta[a]);
@@ -66,8 +71,7 @@ public:
for (int a = 0; a < Dimension; a++) {
tmp = ta[a] * ta[Index] - ta[Index] * ta[a];
for (int b = 0; b < (ncolour * ncolour - 1); b++) {
typename SU<ncolour>::template iSUnMatrix<cplx> tmp1 =
2.0 * tmp * ta[b]; // 2.0 from the normalization
iSUnMatrix<cplx> tmp1 = 2.0 * tmp * ta[b]; // 2.0 from the normalization
Complex iTr = TensorRemove(timesI(trace(tmp1)));
//iAdjTa()()(b, a) = iTr;
iAdjTa()()(a, b) = iTr;
@@ -133,8 +137,7 @@ public:
for (int a = 0; a < Dimension; a++) {
generator(a, iTa);
LatticeComplex tmp = real(trace(iTa * in)) * coefficient;
pokeColour(h_out, tmp, a);
pokeColour(h_out, real(trace(iTa * in)) * coefficient, a);
}
}

View File

@@ -1,273 +0,0 @@
////////////////////////////////////////////////////////////////////////
//
// * Two index representation generators
//
// * Normalisation for the fundamental generators:
// trace ta tb = 1/2 delta_ab = T_F delta_ab
// T_F = 1/2 for SU(N) groups
//
//
// base for NxN two index (anti-symmetric) matrices
// normalized to 1 (d_ij is the kroenecker delta)
//
// (e^(ij)_{kl} = 1 / sqrt(2) (d_ik d_jl +/- d_jk d_il)
//
// Then the generators are written as
//
// (iT_a)^(ij)(lk) = i * ( tr[e^(ij)^dag e^(lk) T^trasp_a] +
// tr[e^(lk)e^(ij)^dag T_a] ) //
//
//
////////////////////////////////////////////////////////////////////////
// Authors: David Preti, Guido Cossu
#ifndef QCD_UTIL_SUN2INDEX_H
#define QCD_UTIL_SUN2INDEX_H
NAMESPACE_BEGIN(Grid);
enum TwoIndexSymmetry { Symmetric = 1, AntiSymmetric = -1 };
inline Real delta(int a, int b) { return (a == b) ? 1.0 : 0.0; }
template <int ncolour, TwoIndexSymmetry S>
class SU_TwoIndex : public SU<ncolour> {
public:
static const int Dimension = ncolour * (ncolour + S) / 2;
static const int NumGenerators = SU<ncolour>::AdjointDimension;
template <typename vtype>
using iSUnTwoIndexMatrix = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
typedef iSUnTwoIndexMatrix<Complex> TIMatrix;
typedef iSUnTwoIndexMatrix<ComplexF> TIMatrixF;
typedef iSUnTwoIndexMatrix<ComplexD> TIMatrixD;
typedef iSUnTwoIndexMatrix<vComplex> vTIMatrix;
typedef iSUnTwoIndexMatrix<vComplexF> vTIMatrixF;
typedef iSUnTwoIndexMatrix<vComplexD> vTIMatrixD;
typedef Lattice<vTIMatrix> LatticeTwoIndexMatrix;
typedef Lattice<vTIMatrixF> LatticeTwoIndexMatrixF;
typedef Lattice<vTIMatrixD> LatticeTwoIndexMatrixD;
typedef Lattice<iVector<iScalar<iMatrix<vComplex, Dimension> >, Nd> >
LatticeTwoIndexField;
typedef Lattice<iVector<iScalar<iMatrix<vComplexF, Dimension> >, Nd> >
LatticeTwoIndexFieldF;
typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> >
LatticeTwoIndexFieldD;
template <typename vtype>
using iSUnMatrix = iScalar<iScalar<iMatrix<vtype, ncolour> > >;
typedef iSUnMatrix<Complex> Matrix;
typedef iSUnMatrix<ComplexF> MatrixF;
typedef iSUnMatrix<ComplexD> MatrixD;
template <class cplx>
static void base(int Index, iSUnMatrix<cplx> &eij) {
// returns (e)^(ij)_{kl} necessary for change of base U_F -> U_R
assert(Index < NumGenerators);
eij = Zero();
// for the linearisation of the 2 indexes
static int a[ncolour * (ncolour - 1) / 2][2]; // store the a <-> i,j
static bool filled = false;
if (!filled) {
int counter = 0;
for (int i = 1; i < ncolour; i++) {
for (int j = 0; j < i; j++) {
a[counter][0] = i;
a[counter][1] = j;
counter++;
}
}
filled = true;
}
if (Index < ncolour * (ncolour - 1) / 2) {
baseOffDiagonal(a[Index][0], a[Index][1], eij);
} else {
baseDiagonal(Index, eij);
}
}
template <class cplx>
static void baseDiagonal(int Index, iSUnMatrix<cplx> &eij) {
eij = Zero();
eij()()(Index - ncolour * (ncolour - 1) / 2,
Index - ncolour * (ncolour - 1) / 2) = 1.0;
}
template <class cplx>
static void baseOffDiagonal(int i, int j, iSUnMatrix<cplx> &eij) {
eij = Zero();
for (int k = 0; k < ncolour; k++)
for (int l = 0; l < ncolour; l++)
eij()()(l, k) = delta(i, k) * delta(j, l) +
S * delta(j, k) * delta(i, l);
RealD nrm = 1. / std::sqrt(2.0);
eij = eij * nrm;
}
static void printBase(void) {
for (int gen = 0; gen < Dimension; gen++) {
Matrix tmp;
base(gen, tmp);
std::cout << GridLogMessage << "Nc = " << ncolour << " t_" << gen
<< std::endl;
std::cout << GridLogMessage << tmp << std::endl;
}
}
template <class cplx>
static void generator(int Index, iSUnTwoIndexMatrix<cplx> &i2indTa) {
Vector<typename SU<ncolour>::template iSUnMatrix<cplx> > ta(
ncolour * ncolour - 1);
Vector<typename SU<ncolour>::template iSUnMatrix<cplx> > eij(Dimension);
typename SU<ncolour>::template iSUnMatrix<cplx> tmp;
i2indTa = Zero();
for (int a = 0; a < ncolour * ncolour - 1; a++)
SU<ncolour>::generator(a, ta[a]);
for (int a = 0; a < Dimension; a++) base(a, eij[a]);
for (int a = 0; a < Dimension; a++) {
tmp = transpose(ta[Index]) * adj(eij[a]) + adj(eij[a]) * ta[Index];
for (int b = 0; b < Dimension; b++) {
typename SU<ncolour>::template iSUnMatrix<cplx> tmp1 =
tmp * eij[b];
Complex iTr = TensorRemove(timesI(trace(tmp1)));
i2indTa()()(a, b) = iTr;
}
}
}
static void printGenerators(void) {
for (int gen = 0; gen < ncolour * ncolour - 1; gen++) {
TIMatrix i2indTa;
generator(gen, i2indTa);
std::cout << GridLogMessage << "Nc = " << ncolour << " t_" << gen
<< std::endl;
std::cout << GridLogMessage << i2indTa << std::endl;
}
}
static void testGenerators(void) {
TIMatrix i2indTa, i2indTb;
std::cout << GridLogMessage << "2IndexRep - Checking if traceless"
<< std::endl;
for (int a = 0; a < ncolour * ncolour - 1; a++) {
generator(a, i2indTa);
std::cout << GridLogMessage << a << std::endl;
assert(norm2(trace(i2indTa)) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage << "2IndexRep - Checking if antihermitean"
<< std::endl;
for (int a = 0; a < ncolour * ncolour - 1; a++) {
generator(a, i2indTa);
std::cout << GridLogMessage << a << std::endl;
assert(norm2(adj(i2indTa) + i2indTa) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage
<< "2IndexRep - Checking Tr[Ta*Tb]=delta(a,b)*(N +- 2)/2"
<< std::endl;
for (int a = 0; a < ncolour * ncolour - 1; a++) {
for (int b = 0; b < ncolour * ncolour - 1; b++) {
generator(a, i2indTa);
generator(b, i2indTb);
// generator returns iTa, so we need a minus sign here
Complex Tr = -TensorRemove(trace(i2indTa * i2indTb));
std::cout << GridLogMessage << "a=" << a << "b=" << b << "Tr=" << Tr
<< std::endl;
}
}
std::cout << GridLogMessage << std::endl;
}
static void TwoIndexLieAlgebraMatrix(
const typename SU<ncolour>::LatticeAlgebraVector &h,
LatticeTwoIndexMatrix &out, Real scale = 1.0) {
conformable(h, out);
GridBase *grid = out.Grid();
LatticeTwoIndexMatrix la(grid);
TIMatrix i2indTa;
out = Zero();
for (int a = 0; a < ncolour * ncolour - 1; a++) {
generator(a, i2indTa);
la = peekColour(h, a) * i2indTa;
out += la;
}
out *= scale;
}
// Projects the algebra components
// of a lattice matrix ( of dimension ncol*ncol -1 )
static void projectOnAlgebra(
typename SU<ncolour>::LatticeAlgebraVector &h_out,
const LatticeTwoIndexMatrix &in, Real scale = 1.0) {
conformable(h_out, in);
h_out = Zero();
TIMatrix i2indTa;
Real coefficient = -2.0 / (ncolour + 2 * S) * scale;
// 2/(Nc +/- 2) for the normalization of the trace in the two index rep
for (int a = 0; a < ncolour * ncolour - 1; a++) {
generator(a, i2indTa);
auto tmp = real(trace(i2indTa * in)) * coefficient;
pokeColour(h_out, tmp, a);
}
}
// a projector that keeps the generators stored to avoid the overhead of
// recomputing them
static void projector(typename SU<ncolour>::LatticeAlgebraVector &h_out,
const LatticeTwoIndexMatrix &in, Real scale = 1.0) {
conformable(h_out, in);
// to store the generators
static std::vector<TIMatrix> i2indTa(ncolour * ncolour -1);
h_out = Zero();
static bool precalculated = false;
if (!precalculated) {
precalculated = true;
for (int a = 0; a < ncolour * ncolour - 1; a++) generator(a, i2indTa[a]);
}
Real coefficient =
-2.0 / (ncolour + 2 * S) * scale; // 2/(Nc +/- 2) for the normalization
// of the trace in the two index rep
for (int a = 0; a < ncolour * ncolour - 1; a++) {
auto tmp = real(trace(i2indTa[a] * in)) * coefficient;
pokeColour(h_out, tmp, a);
}
}
};
// Some useful type names
typedef SU_TwoIndex<Nc, Symmetric> TwoIndexSymmMatrices;
typedef SU_TwoIndex<Nc, AntiSymmetric> TwoIndexAntiSymmMatrices;
typedef SU_TwoIndex<2, Symmetric> SU2TwoIndexSymm;
typedef SU_TwoIndex<3, Symmetric> SU3TwoIndexSymm;
typedef SU_TwoIndex<4, Symmetric> SU4TwoIndexSymm;
typedef SU_TwoIndex<5, Symmetric> SU5TwoIndexSymm;
typedef SU_TwoIndex<2, AntiSymmetric> SU2TwoIndexAntiSymm;
typedef SU_TwoIndex<3, AntiSymmetric> SU3TwoIndexAntiSymm;
typedef SU_TwoIndex<4, AntiSymmetric> SU4TwoIndexAntiSymm;
typedef SU_TwoIndex<5, AntiSymmetric> SU5TwoIndexAntiSymm;
NAMESPACE_END(Grid);
#endif

317
Grid/qcd/utils/Sp2n.impl.h Normal file
View File

@@ -0,0 +1,317 @@
// This file is #included into the body of the class template definition of
// GaugeGroup. So, image there to be
//
// template <int ncolour, class group_name>
// class GaugeGroup {
//
// around it.
//
// Please note that the unconventional file extension makes sure that it
// doesn't get found by the scripts/filelist during bootstrapping.
private:
template <ONLY_IF_Sp>
static int su2subgroups(GroupName::Sp) { return (ncolour/2 * (ncolour/2 - 1)) / 2; }
// Sp(2N) has N(2N+1) = 2N^2+N generators
//
// normalise the generators such that
// Trace ( Ta Tb) = 1/2 delta_ab
//
// N generators in the cartan, 2N^2 off
// off diagonal:
// there are 6 types named a,b,c,d and w,z
// abcd are N(N-1)/2 each while wz are N each
template <class cplx, ONLY_IF_Sp>
static void generator(int lieIndex, iGroupMatrix<cplx> &ta, GroupName::Sp) {
// map lie index into type of generators: diagonal, abcd type, wz type
const int nsp = ncolour/2;
int diagIndex;
int aIndex, bIndex, cIndex, dIndex;
int wIndex, zIndex; // a,b,c,d are N(N-1)/2 and w,z are N
const int mod = nsp * (nsp - 1) * 0.5;
const int offdiag =
2 * nsp * nsp; // number of generators not in the cartan subalgebra
const int wmod = 4 * mod;
const int zmod = wmod + nsp;
if (lieIndex >= offdiag) {
diagIndex = lieIndex - offdiag; // 0, ... ,N-1
// std::cout << GridLogMessage << "diag type " << std::endl;
generatorDiagtype(diagIndex, ta);
return;
}
if ((lieIndex >= wmod) && (lieIndex < zmod)) {
// std::cout << GridLogMessage << "w type " << std::endl;
wIndex = lieIndex - wmod; // 0, ... ,N-1
generatorWtype(wIndex, ta);
return;
}
if ((lieIndex >= zmod) && (lieIndex < offdiag)) {
// std::cout << GridLogMessage << "z type " << std::endl;
// std::cout << GridLogMessage << "lie index " << lieIndex << std::endl;
// std::cout << GridLogMessage << "z mod " << zmod << std::endl;
zIndex = lieIndex - zmod; // 0, ... ,N-1
generatorZtype(zIndex, ta);
return;
}
if (lieIndex < mod) { // atype 0, ... , N(N-1)/2=mod
// std::cout << GridLogMessage << "a type " << std::endl;
aIndex = lieIndex;
// std::cout << GridLogMessage << "a indx " << aIndex << std::endl;
generatorAtype(aIndex, ta);
return;
}
if ((lieIndex >= mod) && lieIndex < 2 * mod) { // btype mod, ... , 2mod-1
// std::cout << GridLogMessage << "b type " << std::endl;
bIndex = lieIndex - mod;
generatorBtype(bIndex, ta);
return;
}
if ((lieIndex >= 2 * mod) &&
lieIndex < 3 * mod) { // ctype 2mod, ... , 3mod-1
// std::cout << GridLogMessage << "c type " << std::endl;
cIndex = lieIndex - 2 * mod;
generatorCtype(cIndex, ta);
return;
}
if ((lieIndex >= 3 * mod) &&
lieIndex < wmod) { // ctype 3mod, ... , 4mod-1 = wmod-1
// std::cout << GridLogMessage << "d type " << std::endl;
dIndex = lieIndex - 3 * mod;
generatorDtype(dIndex, ta);
return;
}
} // end of generator
template <class cplx, ONLY_IF_Sp>
static void generatorDiagtype(int diagIndex, iGroupMatrix<cplx> &ta) {
// ta(i,i) = - ta(i+N,i+N) = 1/2 for each i index of the cartan subalgebra
const int nsp=ncolour/2;
ta = Zero();
RealD nrm = 1.0 / 2;
ta()()(diagIndex, diagIndex) = nrm;
ta()()(diagIndex + nsp, diagIndex + nsp) = -nrm;
}
template <class cplx, ONLY_IF_Sp>
static void generatorAtype(int aIndex, iGroupMatrix<cplx> &ta) {
// ta(i,j) = ta(j,i) = -ta(i+N,j+N) = -ta(j+N,i+N) = 1 / 2 sqrt(2)
// with i<j and i=0,...,N-2
// follows that j=i+1, ... , N
int i1, i2;
const int nsp=ncolour/2;
ta = Zero();
RealD nrm = 1 / (2 * std::sqrt(2));
su2SubGroupIndex(i1, i2, aIndex);
ta()()(i1, i2) = 1;
ta()()(i2, i1) = 1;
ta()()(i1 + nsp, i2 + nsp) = -1;
ta()()(i2 + nsp, i1 + nsp) = -1;
ta = ta * nrm;
}
template <class cplx, ONLY_IF_Sp>
static void generatorBtype(int bIndex, iGroupMatrix<cplx> &ta) {
// ta(i,j) = -ta(j,i) = ta(i+N,j+N) = -ta(j+N,i+N) = i / 1/ 2 sqrt(2)
// with i<j and i=0,...,N-2
// follows that j=i+1, ... , N-1
const int nsp=ncolour/2;
int i1, i2;
ta = Zero();
cplx i(0.0, 1.0);
RealD nrm = 1 / (2 * std::sqrt(2));
su2SubGroupIndex(i1, i2, bIndex);
ta()()(i1, i2) = i;
ta()()(i2, i1) = -i;
ta()()(i1 + nsp, i2 + nsp) = i;
ta()()(i2 + nsp, i1 + nsp) = -i;
ta = ta * nrm;
}
template <class cplx, ONLY_IF_Sp>
static void generatorCtype(int cIndex, iGroupMatrix<cplx> &ta) {
// ta(i,j+N) = ta(j,i+N) = ta(i+N,j) = ta(j+N,i) = 1 / 2 sqrt(2)
const int nsp=ncolour/2;
int i1, i2;
ta = Zero();
RealD nrm = 1 / (2 * std::sqrt(2));
su2SubGroupIndex(i1, i2, cIndex);
ta()()(i1, i2 + nsp) = 1;
ta()()(i2, i1 + nsp) = 1;
ta()()(i1 + nsp, i2) = 1;
ta()()(i2 + nsp, i1) = 1;
ta = ta * nrm;
}
template <class cplx, ONLY_IF_Sp>
static void generatorDtype(int dIndex, iGroupMatrix<cplx> &ta) {
// ta(i,j+N) = ta(j,i+N) = -ta(i+N,j) = -ta(j+N,i) = i / 2 sqrt(2)
const int nsp=ncolour/2;
int i1, i2;
ta = Zero();
cplx i(0.0, 1.0);
RealD nrm = 1 / (2 * std::sqrt(2));
su2SubGroupIndex(i1, i2, dIndex);
ta()()(i1, i2 + nsp) = i;
ta()()(i2, i1 + nsp) = i;
ta()()(i1 + nsp, i2) = -i;
ta()()(i2 + nsp, i1) = -i;
ta = ta * nrm;
}
template <class cplx, ONLY_IF_Sp>
static void generatorWtype(int wIndex, iGroupMatrix<cplx> &ta) {
// ta(i,i+N) = ta(i+N,i) = 1/2
const int nsp=ncolour/2;
ta = Zero();
RealD nrm = 1.0 / 2; // check
ta()()(wIndex, wIndex + nsp) = 1;
ta()()(wIndex + nsp, wIndex) = 1;
ta = ta * nrm;
}
template <class cplx, ONLY_IF_Sp>
static void generatorZtype(int zIndex, iGroupMatrix<cplx> &ta) {
// ta(i,i+N) = - ta(i+N,i) = i/2
const int nsp=ncolour/2;
ta = Zero();
RealD nrm = 1.0 / 2; // check
cplx i(0.0, 1.0);
ta()()(zIndex, zIndex + nsp) = i;
ta()()(zIndex + nsp, zIndex) = -i;
ta = ta * nrm;
}
////////////////////////////////////////////////////////////////////////
// Map a su2 subgroup number to the pair of rows that are non zero
////////////////////////////////////////////////////////////////////////
template <ONLY_IF_Sp>
static void su2SubGroupIndex(int &i1, int &i2, int su2_index, GroupName::Sp) {
const int nsp=ncolour/2;
assert((su2_index >= 0) && (su2_index < (nsp * (nsp - 1)) / 2));
int spare = su2_index;
for (i1 = 0; spare >= (nsp - 1 - i1); i1++) {
spare = spare - (nsp - 1 - i1); // remove the Nc-1-i1 terms
}
i2 = i1 + 1 + spare;
}
static void testGenerators(GroupName::Sp) {
Matrix ta;
Matrix tb;
std::cout << GridLogMessage
<< "Fundamental - Checking trace ta tb is 0.5 delta_ab "
<< std::endl;
for (int a = 0; a < AlgebraDimension; a++) {
for (int b = 0; b < AlgebraDimension; b++) {
generator(a, ta);
generator(b, tb);
Complex tr = TensorRemove(trace(ta * tb));
std::cout << GridLogMessage << "(" << a << "," << b << ") = " << tr
<< std::endl;
if (a == b) assert(abs(tr - Complex(0.5)) < 1.0e-6);
if (a != b) assert(abs(tr) < 1.0e-6);
}
}
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage << "Fundamental - Checking if hermitian"
<< std::endl;
for (int a = 0; a < AlgebraDimension; a++) {
generator(a, ta);
std::cout << GridLogMessage << a << std::endl;
assert(norm2(ta - adj(ta)) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage << "Fundamental - Checking if traceless"
<< std::endl;
for (int a = 0; a < AlgebraDimension; a++) {
generator(a, ta);
Complex tr = TensorRemove(trace(ta));
std::cout << GridLogMessage << a << std::endl;
assert(abs(tr) < 1.0e-6);
}
}
template <int N>
static Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > >
ProjectOnGeneralGroup(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu, GroupName::Sp) {
return ProjectOnSpGroup(Umu);
}
template <class vtype>
accelerator_inline static iScalar<vtype> ProjectOnGeneralGroup(const iScalar<vtype> &r, GroupName::Sp) {
return ProjectOnSpGroup(r);
}
template <class vtype, int N>
accelerator_inline static iVector<vtype,N> ProjectOnGeneralGroup(const iVector<vtype,N> &r, GroupName::Sp) {
return ProjectOnSpGroup(r);
}
template <class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
accelerator_inline static iMatrix<vtype,N> ProjectOnGeneralGroup(const iMatrix<vtype,N> &arg, GroupName::Sp) {
return ProjectOnSpGroup(arg);
}
template <typename LatticeMatrixType>
static void taProj(const LatticeMatrixType &in, LatticeMatrixType &out, GroupName::Sp) {
out = SpTa(in);
}
public:
template <ONLY_IF_Sp>
static void Omega(LatticeColourMatrixD &in) {
const int nsp=ncolour/2;
LatticeColourMatrixD OmegaLatt(in.Grid());
LatticeColourMatrixD identity(in.Grid());
ColourMatrix Omega;
OmegaLatt = Zero();
Omega = Zero();
identity = 1.;
for (int i = 0; i < nsp; i++) {
Omega()()(i, nsp + i) = 1.;
Omega()()(nsp + i, i) = -1;
}
OmegaLatt = OmegaLatt + (identity * Omega);
in = OmegaLatt;
}
template <ONLY_IF_Sp, class vtype, int N>
static void Omega(iScalar<iScalar<iMatrix<vtype, N> > > &in) {
const int nsp=ncolour/2;
iScalar<iScalar<iMatrix<vtype, N> > > Omega;
Omega = Zero();
for (int i = 0; i < nsp; i++) {
Omega()()(i, nsp + i) = 1.;
Omega()()(nsp + i, i) = -1;
}
in = Omega;
}

View File

@@ -8,9 +8,9 @@
#include <Grid/qcd/utils/ScalarObjs.h>
// Include representations
#include <Grid/qcd/utils/SUn.h>
#include <Grid/qcd/utils/GaugeGroup.h>
#include <Grid/qcd/utils/SUnAdjoint.h>
#include <Grid/qcd/utils/SUnTwoIndex.h>
#include <Grid/qcd/utils/GaugeGroupTwoIndex.h>
// All-to-all contraction kernels that touch the
// internal lattice structure

View File

@@ -290,7 +290,7 @@ public:
}
*/
//////////////////////////////////////////////////
// the sum over all staples on each site
// the sum over all nu-oriented staples for nu != mu on each site
//////////////////////////////////////////////////
static void Staple(GaugeMat &staple, const GaugeLorentz &Umu, int mu) {
@@ -300,6 +300,10 @@ public:
for (int d = 0; d < Nd; d++) {
U[d] = PeekIndex<LorentzIndex>(Umu, d);
}
Staple(staple, U, mu);
}
static void Staple(GaugeMat &staple, const std::vector<GaugeMat> &U, int mu) {
staple = Zero();
for (int nu = 0; nu < Nd; nu++) {
@@ -335,6 +339,203 @@ public:
}
}
/////////////
//Staples for each direction mu, summed over nu != mu
//staple: output staples for each mu (Nd)
//U: link array (Nd)
/////////////
static void StapleAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U) {
assert(staple.size() == Nd); assert(U.size() == Nd);
for(int mu=0;mu<Nd;mu++) Staple(staple[mu], U, mu);
}
//A workspace class allowing reuse of the stencil
class WilsonLoopPaddedStencilWorkspace{
std::unique_ptr<GeneralLocalStencil> stencil;
size_t nshift;
void generateStencil(GridBase* padded_grid){
double t0 = usecond();
//Generate shift arrays
std::vector<Coordinate> shifts = this->getShifts();
nshift = shifts.size();
double t1 = usecond();
//Generate local stencil
stencil.reset(new GeneralLocalStencil(padded_grid,shifts));
double t2 = usecond();
std::cout << GridLogPerformance << " WilsonLoopPaddedWorkspace timings: coord:" << (t1-t0)/1000 << "ms, stencil:" << (t2-t1)/1000 << "ms" << std::endl;
}
public:
//Get the stencil. If not already generated, or if generated using a different Grid than in PaddedCell, it will be created on-the-fly
const GeneralLocalStencil & getStencil(const PaddedCell &pcell){
assert(pcell.depth >= this->paddingDepth());
if(!stencil || stencil->Grid() != (GridBase*)pcell.grids.back() ) generateStencil((GridBase*)pcell.grids.back());
return *stencil;
}
size_t Nshift() const{ return nshift; }
virtual std::vector<Coordinate> getShifts() const = 0;
virtual int paddingDepth() const = 0; //padding depth required
virtual ~WilsonLoopPaddedStencilWorkspace(){}
};
//This workspace allows the sharing of a common PaddedCell object between multiple stencil workspaces
class WilsonLoopPaddedWorkspace{
std::vector<WilsonLoopPaddedStencilWorkspace*> stencil_wk;
std::unique_ptr<PaddedCell> pcell;
void generatePcell(GridBase* unpadded_grid){
assert(stencil_wk.size());
int max_depth = 0;
for(auto const &s : stencil_wk) max_depth=std::max(max_depth, s->paddingDepth());
pcell.reset(new PaddedCell(max_depth, dynamic_cast<GridCartesian*>(unpadded_grid)));
}
public:
//Add a stencil definition. This should be done before the first call to retrieve a stencil object.
//Takes ownership of the pointer
void addStencil(WilsonLoopPaddedStencilWorkspace *stencil){
assert(!pcell);
stencil_wk.push_back(stencil);
}
const GeneralLocalStencil & getStencil(const size_t stencil_idx, GridBase* unpadded_grid){
if(!pcell || pcell->unpadded_grid != unpadded_grid) generatePcell(unpadded_grid);
return stencil_wk[stencil_idx]->getStencil(*pcell);
}
const PaddedCell & getPaddedCell(GridBase* unpadded_grid){
if(!pcell || pcell->unpadded_grid != unpadded_grid) generatePcell(unpadded_grid);
return *pcell;
}
~WilsonLoopPaddedWorkspace(){
for(auto &s : stencil_wk) delete s;
}
};
//A workspace class allowing reuse of the stencil
class StaplePaddedAllWorkspace: public WilsonLoopPaddedStencilWorkspace{
public:
std::vector<Coordinate> getShifts() const override{
std::vector<Coordinate> shifts;
for(int mu=0;mu<Nd;mu++){
for(int nu=0;nu<Nd;nu++){
if(nu != mu){
Coordinate shift_0(Nd,0);
Coordinate shift_mu(Nd,0); shift_mu[mu]=1;
Coordinate shift_nu(Nd,0); shift_nu[nu]=1;
Coordinate shift_mnu(Nd,0); shift_mnu[nu]=-1;
Coordinate shift_mnu_pmu(Nd,0); shift_mnu_pmu[nu]=-1; shift_mnu_pmu[mu]=1;
//U_nu(x+mu)U^dag_mu(x+nu) U^dag_nu(x)
shifts.push_back(shift_0);
shifts.push_back(shift_nu);
shifts.push_back(shift_mu);
//U_nu^dag(x-nu+mu) U_mu^dag(x-nu) U_nu(x-nu)
shifts.push_back(shift_mnu);
shifts.push_back(shift_mnu);
shifts.push_back(shift_mnu_pmu);
}
}
}
return shifts;
}
int paddingDepth() const override{ return 1; }
};
//Padded cell implementation of the staple method for all mu, summed over nu != mu
//staple: output staple for each mu, summed over nu != mu (Nd)
//U_padded: the gauge link fields padded out using the PaddedCell class
//Cell: the padded cell class
static void StaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell) {
StaplePaddedAllWorkspace wk;
StaplePaddedAll(staple,U_padded,Cell,wk.getStencil(Cell));
}
//Padded cell implementation of the staple method for all mu, summed over nu != mu
//staple: output staple for each mu, summed over nu != mu (Nd)
//U_padded: the gauge link fields padded out using the PaddedCell class
//Cell: the padded cell class
//gStencil: the precomputed generalized local stencil for the staple
static void StaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell, const GeneralLocalStencil &gStencil)
{
double t0 = usecond();
assert(U_padded.size() == Nd); assert(staple.size() == Nd);
assert(U_padded[0].Grid() == (GridBase*)Cell.grids.back());
assert(Cell.depth >= 1);
GridBase *ggrid = U_padded[0].Grid(); //padded cell grid
int shift_mu_off = gStencil._npoints/Nd;
//Open views to padded gauge links and keep open over mu loop
typedef LatticeView<typename GaugeMat::vector_object> GaugeViewType;
size_t vsize = Nd*sizeof(GaugeViewType);
GaugeViewType* Ug_dirs_v_host = (GaugeViewType*)malloc(vsize);
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i] = U_padded[i].View(AcceleratorRead);
GaugeViewType* Ug_dirs_v = (GaugeViewType*)acceleratorAllocDevice(vsize);
acceleratorCopyToDevice(Ug_dirs_v_host,Ug_dirs_v,vsize);
GaugeMat gStaple(ggrid);
int outer_off = 0;
for(int mu=0;mu<Nd;mu++){
{ //view scope
autoView( gStaple_v , gStaple, AcceleratorWrite);
auto gStencil_v = gStencil.View();
accelerator_for(ss, ggrid->oSites(), (size_t)ggrid->Nsimd(), {
decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss;
stencil_ss = Zero();
int off = outer_off;
for(int nu=0;nu<Nd;nu++){
if(nu != mu){
GeneralStencilEntry const* e = gStencil_v.GetEntry(off++,ss);
auto U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(off++,ss);
auto U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(off++,ss);
auto U2 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
stencil_ss = stencil_ss + U2 * U1 * U0;
e = gStencil_v.GetEntry(off++,ss);
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(off++,ss);
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(off++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
stencil_ss = stencil_ss + U2 * U1 * U0;
}
}
coalescedWrite(gStaple_v[ss],stencil_ss);
}
);
} //ensure views are all closed!
staple[mu] = Cell.Extract(gStaple);
outer_off += shift_mu_off;
}//mu loop
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i].ViewClose();
free(Ug_dirs_v_host);
acceleratorFreeDevice(Ug_dirs_v);
double t1=usecond();
std::cout << GridLogPerformance << "StaplePaddedAll timing:" << (t1-t0)/1000 << "ms" << std::endl;
}
//////////////////////////////////////////////////
// the sum over all staples on each site in direction mu,nu, upper part
//////////////////////////////////////////////////
@@ -707,18 +908,14 @@ public:
// the sum over all staples on each site
//////////////////////////////////////////////////
static void RectStapleDouble(GaugeMat &U2, const GaugeMat &U, int mu) {
U2 = U * Cshift(U, mu, 1);
U2 = U * Gimpl::CshiftLink(U, mu, 1);
}
////////////////////////////////////////////////////////////////////////////
// Hop by two optimisation strategy does not work nicely with Gparity. (could
// do,
// but need to track two deep where cross boundary and apply a conjugation).
// Must differentiate this in Gimpl, and use Gimpl::isPeriodicGaugeField to do
// so .
// Hop by two optimisation strategy. Use RectStapleDouble to obtain 'U2'
////////////////////////////////////////////////////////////////////////////
static void RectStapleOptimised(GaugeMat &Stap, std::vector<GaugeMat> &U2,
std::vector<GaugeMat> &U, int mu) {
static void RectStapleOptimised(GaugeMat &Stap, const std::vector<GaugeMat> &U2,
const std::vector<GaugeMat> &U, int mu) {
Stap = Zero();
@@ -732,9 +929,9 @@ public:
// Up staple ___ ___
// | |
tmp = Cshift(adj(U[nu]), nu, -1);
tmp = Gimpl::CshiftLink(adj(U[nu]), nu, -1);
tmp = adj(U2[mu]) * tmp;
tmp = Cshift(tmp, mu, -2);
tmp = Gimpl::CshiftLink(tmp, mu, -2);
Staple2x1 = Gimpl::CovShiftForward(U[nu], nu, tmp);
@@ -742,14 +939,14 @@ public:
// |___ ___|
//
tmp = adj(U2[mu]) * U[nu];
Staple2x1 += Gimpl::CovShiftBackward(U[nu], nu, Cshift(tmp, mu, -2));
Staple2x1 += Gimpl::CovShiftBackward(U[nu], nu, Gimpl::CshiftLink(tmp, mu, -2));
// ___ ___
// | ___|
// |___ ___|
//
Stap += Cshift(Gimpl::CovShiftForward(U[mu], mu, Staple2x1), mu, 1);
Stap += Gimpl::CshiftLink(Gimpl::CovShiftForward(U[mu], mu, Staple2x1), mu, 1);
// ___ ___
// |___ |
@@ -758,7 +955,7 @@ public:
// tmp= Staple2x1* Cshift(U[mu],mu,-2);
// Stap+= Cshift(tmp,mu,1) ;
Stap += Cshift(Staple2x1, mu, 1) * Cshift(U[mu], mu, -1);
Stap += Gimpl::CshiftLink(Staple2x1, mu, 1) * Gimpl::CshiftLink(U[mu], mu, -1);
;
// --
@@ -766,10 +963,10 @@ public:
//
// | |
tmp = Cshift(adj(U2[nu]), nu, -2);
tmp = Gimpl::CshiftLink(adj(U2[nu]), nu, -2);
tmp = Gimpl::CovShiftBackward(U[mu], mu, tmp);
tmp = U2[nu] * Cshift(tmp, nu, 2);
Stap += Cshift(tmp, mu, 1);
tmp = U2[nu] * Gimpl::CshiftLink(tmp, nu, 2);
Stap += Gimpl::CshiftLink(tmp, mu, 1);
// | |
//
@@ -778,25 +975,12 @@ public:
tmp = Gimpl::CovShiftBackward(U[mu], mu, U2[nu]);
tmp = adj(U2[nu]) * tmp;
tmp = Cshift(tmp, nu, -2);
Stap += Cshift(tmp, mu, 1);
tmp = Gimpl::CshiftLink(tmp, nu, -2);
Stap += Gimpl::CshiftLink(tmp, mu, 1);
}
}
}
static void RectStaple(GaugeMat &Stap, const GaugeLorentz &Umu, int mu) {
RectStapleUnoptimised(Stap, Umu, mu);
}
static void RectStaple(const GaugeLorentz &Umu, GaugeMat &Stap,
std::vector<GaugeMat> &U2, std::vector<GaugeMat> &U,
int mu) {
if (Gimpl::isPeriodicGaugeField()) {
RectStapleOptimised(Stap, U2, U, mu);
} else {
RectStapleUnoptimised(Stap, Umu, mu);
}
}
static void RectStapleUnoptimised(GaugeMat &Stap, const GaugeLorentz &Umu,
int mu) {
GridBase *grid = Umu.Grid();
@@ -895,6 +1079,288 @@ public:
}
}
static void RectStaple(GaugeMat &Stap, const GaugeLorentz &Umu, int mu) {
RectStapleUnoptimised(Stap, Umu, mu);
}
static void RectStaple(const GaugeLorentz &Umu, GaugeMat &Stap,
std::vector<GaugeMat> &U2, std::vector<GaugeMat> &U,
int mu) {
RectStapleOptimised(Stap, U2, U, mu);
}
//////////////////////////////////////////////////////
//Compute the rectangular staples for all orientations
//Stap : Array of staples (Nd)
//U: Gauge links in each direction (Nd)
/////////////////////////////////////////////////////
static void RectStapleAll(std::vector<GaugeMat> &Stap, const std::vector<GaugeMat> &U){
assert(Stap.size() == Nd); assert(U.size() == Nd);
std::vector<GaugeMat> U2(Nd,U[0].Grid());
for(int mu=0;mu<Nd;mu++) RectStapleDouble(U2[mu], U[mu], mu);
for(int mu=0;mu<Nd;mu++) RectStapleOptimised(Stap[mu], U2, U, mu);
}
//A workspace class allowing reuse of the stencil
class RectStaplePaddedAllWorkspace: public WilsonLoopPaddedStencilWorkspace{
public:
std::vector<Coordinate> getShifts() const override{
std::vector<Coordinate> shifts;
for (int mu = 0; mu < Nd; mu++){
for (int nu = 0; nu < Nd; nu++) {
if (nu != mu) {
auto genShift = [&](int mushift,int nushift){
Coordinate out(Nd,0); out[mu]=mushift; out[nu]=nushift; return out;
};
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
shifts.push_back(genShift(0,0));
shifts.push_back(genShift(0,+1));
shifts.push_back(genShift(+1,+1));
shifts.push_back(genShift(+2,0));
shifts.push_back(genShift(+1,0));
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
shifts.push_back(genShift(0,-1));
shifts.push_back(genShift(0,-1));
shifts.push_back(genShift(+1,-1));
shifts.push_back(genShift(+2,-1));
shifts.push_back(genShift(+1,0));
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
shifts.push_back(genShift(-1,0));
shifts.push_back(genShift(-1,-1));
shifts.push_back(genShift(-1,-1));
shifts.push_back(genShift(0,-1));
shifts.push_back(genShift(+1,-1));
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
shifts.push_back(genShift(-1,0));
shifts.push_back(genShift(-1,0));
shifts.push_back(genShift(-1,+1));
shifts.push_back(genShift(0,+1));
shifts.push_back(genShift(+1,0));
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
shifts.push_back(genShift(0,0));
shifts.push_back(genShift(0,+1));
shifts.push_back(genShift(0,+2));
shifts.push_back(genShift(+1,+1));
shifts.push_back(genShift(+1,0));
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
shifts.push_back(genShift(0,-1));
shifts.push_back(genShift(0,-2));
shifts.push_back(genShift(0,-2));
shifts.push_back(genShift(+1,-2));
shifts.push_back(genShift(+1,-1));
}
}
}
return shifts;
}
int paddingDepth() const override{ return 2; }
};
//Padded cell implementation of the rectangular staple method for all mu, summed over nu != mu
//staple: output staple for each mu, summed over nu != mu (Nd)
//U_padded: the gauge link fields padded out using the PaddedCell class
//Cell: the padded cell class
static void RectStaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell) {
RectStaplePaddedAllWorkspace wk;
RectStaplePaddedAll(staple,U_padded,Cell,wk.getStencil(Cell));
}
//Padded cell implementation of the rectangular staple method for all mu, summed over nu != mu
//staple: output staple for each mu, summed over nu != mu (Nd)
//U_padded: the gauge link fields padded out using the PaddedCell class
//Cell: the padded cell class
//gStencil: the stencil
static void RectStaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell, const GeneralLocalStencil &gStencil) {
double t0 = usecond();
assert(U_padded.size() == Nd); assert(staple.size() == Nd);
assert(U_padded[0].Grid() == (GridBase*)Cell.grids.back());
assert(Cell.depth >= 2);
GridBase *ggrid = U_padded[0].Grid(); //padded cell grid
size_t nshift = gStencil._npoints;
int mu_off_delta = nshift / Nd;
//Open views to padded gauge links and keep open over mu loop
typedef LatticeView<typename GaugeMat::vector_object> GaugeViewType;
size_t vsize = Nd*sizeof(GaugeViewType);
GaugeViewType* Ug_dirs_v_host = (GaugeViewType*)malloc(vsize);
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i] = U_padded[i].View(AcceleratorRead);
GaugeViewType* Ug_dirs_v = (GaugeViewType*)acceleratorAllocDevice(vsize);
acceleratorCopyToDevice(Ug_dirs_v_host,Ug_dirs_v,vsize);
GaugeMat gStaple(ggrid); //temp staple object on padded grid
int offset = 0;
for(int mu=0; mu<Nd; mu++){
{ //view scope
autoView( gStaple_v , gStaple, AcceleratorWrite);
auto gStencil_v = gStencil.View();
accelerator_for(ss, ggrid->oSites(), (size_t)ggrid->Nsimd(), {
decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss;
stencil_ss = Zero();
int s=offset;
for(int nu=0;nu<Nd;nu++){
if(nu != mu){
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
GeneralStencilEntry const* e = gStencil_v.GetEntry(s++,ss);
auto U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
auto U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
auto U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
auto U3 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
auto U4 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
e = gStencil_v.GetEntry(s++,ss);
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U4 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
e = gStencil_v.GetEntry(s++,ss);
U0 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U1 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U4 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
e = gStencil_v.GetEntry(s++,ss);
U0 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U4 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
e = gStencil_v.GetEntry(s++,ss);
U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U3 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U4 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
e = gStencil_v.GetEntry(s++,ss);
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U1 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U4 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
}
}
coalescedWrite(gStaple_v[ss],stencil_ss);
}
);
offset += mu_off_delta;
}//kernel/view scope
staple[mu] = Cell.Extract(gStaple);
}//mu loop
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i].ViewClose();
free(Ug_dirs_v_host);
acceleratorFreeDevice(Ug_dirs_v);
double t1 = usecond();
std::cout << GridLogPerformance << "RectStaplePaddedAll timings:" << (t1-t0)/1000 << "ms" << std::endl;
}
//A workspace for reusing the PaddedCell and GeneralLocalStencil objects
class StapleAndRectStapleAllWorkspace: public WilsonLoopPaddedWorkspace{
public:
StapleAndRectStapleAllWorkspace(){
this->addStencil(new StaplePaddedAllWorkspace);
this->addStencil(new RectStaplePaddedAllWorkspace);
}
};
//////////////////////////////////////////////////////
//Compute the 1x1 and 1x2 staples for all orientations
//Stap : Array of staples (Nd)
//RectStap: Array of rectangular staples (Nd)
//U: Gauge links in each direction (Nd)
/////////////////////////////////////////////////////
static void StapleAndRectStapleAll(std::vector<GaugeMat> &Stap, std::vector<GaugeMat> &RectStap, const std::vector<GaugeMat> &U){
StapleAndRectStapleAllWorkspace wk;
StapleAndRectStapleAll(Stap,RectStap,U,wk);
}
//////////////////////////////////////////////////////
//Compute the 1x1 and 1x2 staples for all orientations
//Stap : Array of staples (Nd)
//RectStap: Array of rectangular staples (Nd)
//U: Gauge links in each direction (Nd)
//wk: a workspace containing stored PaddedCell and GeneralLocalStencil objects to maximize reuse
/////////////////////////////////////////////////////
static void StapleAndRectStapleAll(std::vector<GaugeMat> &Stap, std::vector<GaugeMat> &RectStap, const std::vector<GaugeMat> &U, StapleAndRectStapleAllWorkspace &wk){
#if 0
StapleAll(Stap, U);
RectStapleAll(RectStap, U);
#else
double t0 = usecond();
GridCartesian* unpadded_grid = dynamic_cast<GridCartesian*>(U[0].Grid());
const PaddedCell &Ghost = wk.getPaddedCell(unpadded_grid);
CshiftImplGauge<Gimpl> cshift_impl;
std::vector<GaugeMat> U_pad(Nd, Ghost.grids.back());
for(int mu=0;mu<Nd;mu++) U_pad[mu] = Ghost.Exchange(U[mu], cshift_impl);
double t1 = usecond();
StaplePaddedAll(Stap, U_pad, Ghost, wk.getStencil(0,unpadded_grid) );
double t2 = usecond();
RectStaplePaddedAll(RectStap, U_pad, Ghost, wk.getStencil(1,unpadded_grid));
double t3 = usecond();
std::cout << GridLogPerformance << "StapleAndRectStapleAll timings: pad:" << (t1-t0)/1000 << "ms, staple:" << (t2-t1)/1000 << "ms, rect-staple:" << (t3-t2)/1000 << "ms" << std::endl;
#endif
}
//////////////////////////////////////////////////
// Wilson loop of size (R1, R2), oriented in mu,nu plane
//////////////////////////////////////////////////

View File

@@ -320,7 +320,7 @@ struct Conj{
struct TimesMinusI{
//Complex single
inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
inline float32x4_t operator()(float32x4_t in){
// ar ai br bi -> ai -ar ai -br
float32x4_t r0, r1;
r0 = vnegq_f32(in); // -ar -ai -br -bi
@@ -328,7 +328,7 @@ struct TimesMinusI{
return vtrn1q_f32(r1, r0); // ar -ai br -bi
}
//Complex double
inline float64x2_t operator()(float64x2_t in, float64x2_t ret){
inline float64x2_t operator()(float64x2_t in){
// a ib -> b -ia
float64x2_t tmp;
tmp = vnegq_f64(in);
@@ -338,7 +338,7 @@ struct TimesMinusI{
struct TimesI{
//Complex single
inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
inline float32x4_t operator()(float32x4_t in){
// ar ai br bi -> -ai ar -bi br
float32x4_t r0, r1;
r0 = vnegq_f32(in); // -ar -ai -br -bi
@@ -346,7 +346,7 @@ struct TimesI{
return vtrn1q_f32(r1, in); // -ai ar -bi br
}
//Complex double
inline float64x2_t operator()(float64x2_t in, float64x2_t ret){
inline float64x2_t operator()(float64x2_t in){
// a ib -> -b ia
float64x2_t tmp;
tmp = vnegq_f64(in);

View File

@@ -218,6 +218,10 @@ public:
// -------------------------------------------------
// misc
// -------------------------------------------------
void discardhi(uint64_t z) {
_s[3] += z;
encrypt_counter();
}
// req: 26.5.1.4 Random number engine requirements, p.908 table 117, row 9
// Advances es state ei to ei+z by any means equivalent to z
@@ -387,4 +391,4 @@ private:
#undef MIXK
#undef MIX2
#endif
#endif

View File

@@ -43,7 +43,7 @@ class GeneralLocalStencilView {
int _npoints; // Move to template param?
GeneralStencilEntry* _entries_p;
accelerator_inline GeneralStencilEntry * GetEntry(int point,int osite) {
accelerator_inline GeneralStencilEntry * GetEntry(int point,int osite) const {
return & this->_entries_p[point+this->_npoints*osite];
}
@@ -79,60 +79,60 @@ public:
this->_entries.resize(npoints* osites);
this->_entries_p = &_entries[0];
thread_for(site, osites, {
Coordinate Coor;
Coordinate NbrCoor;
Coordinate Coor;
Coordinate NbrCoor;
for(Integer site=0;site<osites;site++){
for(Integer ii=0;ii<npoints;ii++){
Integer lex = site*npoints+ii;
GeneralStencilEntry SE;
////////////////////////////////////////////////
// Outer index of neighbour Offset calculation
////////////////////////////////////////////////
grid->oCoorFromOindex(Coor,site);
for(int d=0;d<Coor.size();d++){
int rd = grid->_rdimensions[d];
NbrCoor[d] = (Coor[d] + shifts[ii][d] + rd )%rd;
for(Integer ii=0;ii<npoints;ii++){
Integer lex = site*npoints+ii;
GeneralStencilEntry SE;
////////////////////////////////////////////////
// Outer index of neighbour Offset calculation
////////////////////////////////////////////////
grid->oCoorFromOindex(Coor,site);
for(int d=0;d<Coor.size();d++){
int rd = grid->_rdimensions[d];
NbrCoor[d] = (Coor[d] + shifts[ii][d] + rd )%rd;
}
SE._offset = grid->oIndexReduced(NbrCoor);
////////////////////////////////////////////////
// Inner index permute calculation
// Simpler version using icoor calculation
////////////////////////////////////////////////
SE._permute =0;
for(int d=0;d<Coor.size();d++){
int fd = grid->_fdimensions[d];
int rd = grid->_rdimensions[d];
int ly = grid->_simd_layout[d];
assert((ly==1)||(ly==2));
int shift = (shifts[ii][d]+fd)%fd; // make it strictly positive 0.. L-1
int x = Coor[d]; // x in [0... rd-1] as an oSite
int permute_dim = grid->PermuteDim(d);
int permute_slice=0;
if(permute_dim){
int num = shift%rd; // Slice within dest osite cell of slice zero
int wrap = shift/rd; // Number of osite local volume cells crossed through
// x+num < rd dictates whether we are in same permute state as slice 0
if ( x< rd-num ) permute_slice=wrap;
else permute_slice=(wrap+1)%ly;
}
if ( permute_slice ) {
int ptype =grid->PermuteType(d);
uint8_t mask =0x1<<ptype;
SE._permute |= mask;
}
}
////////////////////////////////////////////////
// Store in look up table
////////////////////////////////////////////////
this->_entries[lex] = SE;
}
SE._offset = grid->oIndexReduced(NbrCoor);
////////////////////////////////////////////////
// Inner index permute calculation
// Simpler version using icoor calculation
////////////////////////////////////////////////
SE._permute =0;
for(int d=0;d<Coor.size();d++){
int fd = grid->_fdimensions[d];
int rd = grid->_rdimensions[d];
int ly = grid->_simd_layout[d];
assert((ly==1)||(ly==2));
int shift = (shifts[ii][d]+fd)%fd; // make it strictly positive 0.. L-1
int x = Coor[d]; // x in [0... rd-1] as an oSite
int permute_dim = grid->PermuteDim(d);
int permute_slice=0;
if(permute_dim){
int num = shift%rd; // Slice within dest osite cell of slice zero
int wrap = shift/rd; // Number of osite local volume cells crossed through
// x+num < rd dictates whether we are in same permute state as slice 0
if ( x< rd-num ) permute_slice=wrap;
else permute_slice=(wrap+1)%ly;
}
if ( permute_slice ) {
int ptype =grid->PermuteType(d);
uint8_t mask =grid->Nsimd() >> (ptype + 1);
SE._permute |= mask;
}
}
////////////////////////////////////////////////
// Store in look up table
////////////////////////////////////////////////
this->_entries[lex] = SE;
}
}
});
}
};

Some files were not shown because too many files have changed in this diff Show More