1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-21 17:22:03 +01:00

Compare commits

...

322 Commits

Author SHA1 Message Date
351795ac3a Better messaging 2023-10-20 19:33:04 -04:00
9c9c42d0df Tests on frontier with real speed up . 3.5x on 16^3 at mq=0.01 2023-10-20 19:27:13 -04:00
b6ad1bafc7 Normal memory SendToRecvFrom asynchronous for use in general stencil
code
2023-10-20 19:27:13 -04:00
a5ca40f446 Better verbose -- track CPU GPU motion under --log Memory, others go to
debug output stream
2023-10-20 19:27:13 -04:00
9ab54c5565 Overlap comms & data copy/buffer assembly in Ghost zone exchange 2023-10-20 19:27:13 -04:00
4341d96bde Massively sped up coarse grid mult, comms
Save 3ms spend (60% of time !) on cudaMalloc !!
2023-10-20 19:27:13 -04:00
5fac47a26d Faster halo exchange 2023-10-20 19:27:13 -04:00
e064f17346 Faster halo exchange 2023-10-20 19:27:13 -04:00
afe10ba2a2 More digits 2023-10-20 19:27:13 -04:00
7cc3435ba8 Imporved General coarsened matrix 2023-10-20 19:27:13 -04:00
541772313c Verbosity 2023-10-20 19:27:13 -04:00
3747494a09 Notify delet public 2023-10-20 19:27:13 -04:00
f2b98d0dcc Const safety 2023-10-20 19:27:13 -04:00
80471bf762 Alternate implementation involving face operations 2023-10-20 19:27:13 -04:00
a06f63c110 Improved I/O and non-lexico option exposed to SciDAC format 2023-10-20 19:27:13 -04:00
0ae4478cd9 Checkpoint the subspace and ldop 2023-10-20 19:27:13 -04:00
ae4e705e09 Use random vec as easier for debug 2023-10-20 19:27:13 -04:00
f5dcea9dbf Updates for Frontier 2023-10-20 19:27:12 -04:00
2207309f8a Spack rules 2023-10-16 18:38:24 -04:00
2111e7ab5f Run at physical mass 2023-10-06 21:20:21 -04:00
d29abfdcaf Transfer code to Frontier now 2023-10-06 21:03:34 -04:00
a751c42cc5 Checkpoint restore the setup 2023-10-06 21:03:08 -04:00
6a3bc9865e Verbose change 2023-10-06 21:02:04 -04:00
4d5f7e4377 Verbose change 2023-10-06 21:01:37 -04:00
78b117fb78 Comment fix 2023-10-06 21:01:15 -04:00
ded63a1319 Verbose change/pretty print 2023-10-06 21:00:53 -04:00
df3e4d1e9c Return fix 2023-10-06 21:00:21 -04:00
b58fd80379 I/O for coarse op and reorganise multigrid headers 2023-10-06 13:43:46 -04:00
7f6e0f57d0 No IO in file 2023-10-06 13:39:53 -04:00
cae27678d8 gpermute 2023-10-06 13:39:19 -04:00
48ff655bad Slightly less verbose 2023-10-06 10:47:52 -04:00
2525ad4623 Slight clean up 2023-10-06 10:47:32 -04:00
e7020017c5 Reorganise multigrid 2023-10-06 10:47:12 -04:00
eacebfad74 Reorganise multigrid into multiple headers 2023-10-06 10:46:21 -04:00
3bc2da5321 Merge branch 'feature/scidac-wp1' of https://github.com/paboyle/Grid into feature/scidac-wp1 2023-10-05 16:57:59 -04:00
2d710d6bfd Optimised parameters for 16^3 2023-10-05 16:56:55 -04:00
6532b7f32b Eliminate older inefficient coarsening implementation 2023-10-05 16:56:15 -04:00
7b41b92d99 Only need to bad non-local dimensions 2023-10-05 16:55:48 -04:00
dd557af84b ADEF1 and ADEF2 2 level CG 2023-10-05 16:55:19 -04:00
59b9d0e030 coalesceRead the blockSum 2023-10-05 16:54:48 -04:00
b82eee4733 Hermitian dealing with 2023-10-05 16:54:14 -04:00
6a87487544 Running on Frontier, fix RNG big volume y2k, affecting 5D RNG 2023-10-05 16:50:59 -04:00
fcf5023845 Running on Frontier 2023-10-05 16:50:59 -04:00
c8adad6d8b First runs on Summit. PopulateAdag needs work 2023-10-05 16:50:54 -04:00
737d3ffb98 ADEF1 and 1 hop projection 2023-10-03 14:22:18 -04:00
b01e67bab1 coalescedReadGeneralPermute now working 2023-10-02 17:46:57 -04:00
8a70314f54 Merge branch 'develop' into feature/scidac-wp1 2023-10-02 17:24:55 -04:00
afc316f501 Rename headers 2023-10-02 16:25:11 -04:00
f14bfd5c1b Relocate sub includes 2023-10-02 16:23:38 -04:00
c5f1420dea Merge remote-tracking branch 'LupoA/develop' into LupoA-develop 2023-10-02 16:22:35 -04:00
018e6da872 Merge pull request #440 from giltirn/feature/paddedcellgauge
Feature/paddedcellgauge
2023-10-02 10:00:42 -04:00
b77bccfac2 Merge pull request #444 from mmphys/feature/docX
Update doc complete list of Macports needed to build Grid on a fresh Mac
2023-10-02 09:57:11 -04:00
36ae6e5aba Fastest GPU version.
Need to work on the PaddedCell now to make much faster
2023-09-29 18:26:51 -04:00
9db585cfeb Temporary commit while optimisation is carried out 2023-09-29 17:11:35 -04:00
c564611ba7 Annoying hack that is useful to preserve for profiling 2023-09-29 17:11:12 -04:00
e187bcb85c Updating 2023-09-29 17:10:17 -04:00
be18ffe3b4 Further tuning and lanczos 2023-09-27 16:21:58 -04:00
0d63dce4e2 Timing info 2023-09-27 16:21:14 -04:00
26b30e1551 Flop count and projection to nearest neighbour (keeps redundant flops) 2023-09-27 16:20:11 -04:00
7fc58ac293 Verbose subspace init 2023-09-27 16:19:45 -04:00
3a86cce8c1 Compile 2023-09-27 16:19:18 -04:00
80359e0d49 Bland SYCL compile 2023-09-26 13:20:27 -07:00
3d437c5cc4 Making SYCL happy 2023-09-26 13:19:42 -07:00
37884d369f Coarse space is expensive, but gives a speed up in fine matrix multiplies now.
Down to optimisation
2023-09-25 17:24:19 -04:00
9246e653cd Basic non-local coarsening of operator test 2023-09-25 17:20:58 -04:00
64283c8673 Normal equations becomes linear function for easy base class pass aroudn 2023-09-25 17:19:39 -04:00
755002da9c Comparison convenience 2023-09-25 17:16:33 -04:00
31b8e8b437 Better messaging 2023-09-25 17:16:14 -04:00
0ec0de97e6 Adef2 implemented and working in an HDCG like context 2023-09-25 17:15:03 -04:00
6c3ade5d89 Improved the coarsening 2023-09-25 17:14:40 -04:00
980c5f9a34 Update chebyshev setup 2023-09-25 17:12:22 -04:00
471ca5f281 Power method more iterations 2023-09-07 10:55:05 -04:00
e82ddcff5d Working getting closer to HDCG but some low level engineering work still needed
+ MUCH work on optimisation
2023-09-07 10:53:51 -04:00
b9dcad89e8 Test cases for coarsening with non-local stencil 2023-09-07 10:53:22 -04:00
993f43ef4a Even odd use case 2023-09-07 10:53:06 -04:00
2b43308208 First cut non-local coarsening 2023-08-25 17:38:07 -04:00
04a1ac3a76 First cut for non-local coarsening 2023-08-25 17:37:38 -04:00
990b8798bd Merge remote-tracking branch 'refs/remotes/origin/develop' into develop 2023-08-25 17:36:45 -04:00
b334a73a44 Stencil improvement 2023-08-25 17:35:10 -04:00
5d113d1c70 Odd address sanitizer complain 2023-08-25 17:34:18 -04:00
c14977aeab Random vector option for test purposes 2023-08-25 17:33:31 -04:00
3e94838204 Spread out improvement 2023-08-25 17:31:28 -04:00
c0a0b8ca62 NEON and address sanitiser 2023-08-25 17:30:30 -04:00
b8a7004365 Partial fraction test 2023-08-14 15:17:03 -04:00
bd56c95a6f Update documentation with complete list of Macports needed to build Grid on a fresh Mac 2023-07-14 13:50:06 +01:00
994512048e Merge pull request #439 from felixerben/bugfix/IRL_convergence
Bugfix/irl convergence
2023-07-12 16:32:26 -04:00
dbd8bb49dc Merge pull request #32 from LupoA/sp2n/develop
Sp2n/develop
2023-07-04 15:23:43 +00:00
3a29af0ce4 Fixed linker error 2023-07-04 16:08:44 +01:00
f7b79cdd45 Added test for ProjectSpn 2023-07-03 18:00:32 +01:00
075b9d22d0 adjoint rep implemented as 2indx symmetric 2023-07-02 13:58:31 +01:00
b92428f05f better test 2023-07-02 13:34:03 +01:00
34b11864b6 prettiest tests 2023-07-02 13:25:57 +01:00
1dfaa08afb The stencils for the staple and rect-staple padded cell implementations are now created and stored by workspace classes that allow for reuse providing the grids remain consistent
The workspaces are now used by the plaq+rectangle gauge action resulting in a further 2x performance improvement as measured on a 16^4 local volume for 2 nodes (16 ranks) of Crusher
2023-06-28 15:11:24 -04:00
f44dce390f Implemented acclerator-optimized versions of localCopyRegion and insertSliceLocal to speed up padding
Fixed const correctness on PaddedCell methods
Fixed compile issues on Crusher
Added timing breakdowns for PaddedCell::Expand and the padded implementations of the staples, visible under --log Performance
Optimized kernel for StaplePadded
Test_iwasaki_action_newstaple now repeats the calculation 10 times and reports average timings
2023-06-27 14:58:10 -04:00
bb71e9a96a Added PaddedCell and GeneralisedLocalStencil header includes to standard base headers
Moved versions of the padded-cell implementations of staple and rect-staple from test code to WilsonLoops header
Added StapleAndRectStapleAll which is now called by the plaq+rectangle action class. Under the hood it uses the padded cell implementations with maximal reuse of the padded gauge links
2023-06-27 11:23:30 -04:00
78bae9417c returning Nstop vectors even if not all meet true convergence criterion 2023-06-27 14:38:19 +01:00
dd170ead01 whitespace 2023-06-27 11:37:01 +01:00
014704856f do one more iteration if not all vectors converged 2023-06-27 11:33:30 +01:00
6f6844ccf1 Added new StapleAll and RectStapleAll functions that return the staples for all mu as an array
Modified plaq+rectangle gauge actions to use the above
Added a test code to confirm the above changes
2023-06-26 15:48:47 -04:00
4c6613d72c Modified RectStapleDouble and RectStapleOptimised to use Gauge-BC respecting CshiftLink
Added test code tests/debug/Test_optimized_staple_gaugebc demonstrating equivalence of above to RectStapleUnoptimised for cconj gauge BCs
Removed optimized staple only being used for periodic gauge BCs; it is now always used
2023-06-26 10:20:23 -04:00
ee92e08edb Merge pull request #435 from fjosw/fix/warnings_in_WilsonKernelsImplementation
Unused variable in WilsonKernelsImplementation
2023-06-23 11:47:19 -04:00
c1dcee9328 Merge pull request #437 from fjosw/fix/stencil_debug
Added GridLogDebug to BuildSurfaceList debug message
2023-06-23 11:47:00 -04:00
559257bbe9 better documentation and filelist names 2023-06-23 16:16:48 +01:00
6b150961fe Better script 2023-06-23 18:09:25 +03:00
cff1f8d3b8 rm unused variables and formatting 2023-06-23 16:04:18 +01:00
f27d2083cd adjustments in SUn and Sp2n impl 2023-06-23 15:34:08 +01:00
36cc9c524f Threaded the constructor of GeneralLocalStencil 2023-06-23 09:57:38 -04:00
2822487450 rm unncessary line 2023-06-23 14:55:23 +01:00
e07fafe46a minor adjustments to twoindex 2023-06-23 12:18:04 +01:00
063d290bd8 missing function 2023-06-23 11:11:20 +01:00
4e6194d92a Avoid code duplication in ProjectSUn 2023-06-23 11:03:50 +01:00
de30c4e22a minor improvements 2023-06-23 10:49:41 +01:00
5bafcaedfa Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-06-22 19:59:45 +03:00
bfeceae708 FTHMC 2023-06-22 12:58:18 -04:00
eacb66591f Config command 2023-06-22 19:56:40 +03:00
fadaa85626 Update 2023-06-22 19:56:27 +03:00
02a5b0d786 Updating run during testing 2023-06-22 19:52:46 +03:00
0e2141442a Dennis says broken 2023-06-22 19:19:51 +03:00
769eb0eecb Precision coverage 2023-06-22 19:19:20 +03:00
4241c7d4a3 Imported coalescedReadGeneralPermute GPU implementation from Christoph
Fixed bug in padded staple code where extract was being called on the result before the GPU view was closed
Fixed compile issue with pointer cast in padded staple code
Added timing summaries of padded staple code and timing breakdown of staple implementation to Test_padded_cell_staple
2023-06-21 16:01:01 -04:00
7b11075102 The user can now specify the implementation of Cshift used by the PaddedCell class through a virtual base class API. Implementations for default (regular Cshift) and for gauge links (which respects the gauge BCs)
Fixed const-correctness for PaddedCell and ConjugateGimpl::setDirections
Modified test code for padded-cell implementation of staple, rect-staple to use cconj BCs
2023-06-20 17:09:56 -04:00
abc658dca5 Added coalescedReadGeneralPermute CPU implementation based on Christoph's GPT code
In a test code, implemented a padded-cell version of the staple and rectangular-staple calculation
2023-06-20 16:14:25 -04:00
2372275b2c Merge pull request #36 from LupoA/sp2n/gpu-bugfix
Sp2n/gpu bugfix [close #30]
2023-06-20 13:46:00 +01:00
ef736e8aa4 Merge pull request #35 from LupoA/sp2n/enableSp
consistent enable sp config flag
2023-06-20 10:41:09 +00:00
5e539e2d54 Forgot some follow-ups on changed signature 2023-06-18 12:37:51 +01:00
96773f5254 Apparently forgot to remove one Lattice version 2023-06-18 12:21:39 +01:00
d80df09f3b consistent enable sp config flag 2023-06-16 19:16:46 +01:00
621e612c30 Fix non-zero ret on device bug 2023-06-16 16:27:49 +01:00
8c3792721b ClangFormat 2023-06-16 15:58:23 +01:00
c95bbd3948 Remove accelerated lattice version 2023-06-16 15:50:26 +01:00
e28ab7a732 Re-included instantiations for symmetric 2Index AS Sp 2023-06-16 14:20:37 +01:00
c797cbe737 deal with post-merge trauma 2023-06-16 14:20:37 +01:00
e09dfbf1c2 definetely the right merge upstream/develop 2023-06-16 14:19:46 +01:00
85e35c4da1 fix: added GridLogDebug to BuildSurfaceList debug message. 2023-06-16 10:31:16 +01:00
d72e914cf0 Profiling temporary code until optimised 2023-06-15 10:43:04 -04:00
3b5254e2d5 Optional checkpoint smeared configs for FTHMC 2023-06-15 10:43:04 -04:00
f1c358b596 Additional tests 2023-06-15 10:43:04 -04:00
c0ef210265 Hot start should be properly Hot 2023-06-15 10:43:04 -04:00
e3e1cc1962 Ta project 2023-06-15 10:43:04 -04:00
723eadbb5c Keep methods virtual 2023-06-15 10:43:04 -04:00
e24637ec1e Clean up 2023-06-15 10:43:04 -04:00
8b01ff4ce7 Integrator over to smeared force structure 2023-06-15 10:43:04 -04:00
588197c487 Smeared action virtual class 2023-06-15 10:43:04 -04:00
116d90b0ee First attempt on #30 2023-06-15 15:09:37 +01:00
b0646ca187 Remove some unused variables 2023-06-15 15:09:09 +01:00
1352bad2e4 Sunspot compile 2023-06-15 11:22:46 +00:00
4895ff260e Merge pull request #28 from LupoA/sp2n/config
compile sp2n fermion impl only if declared at config time
2023-06-09 13:07:48 +00:00
470d93006a compile sp2n fermion impl only if declared at config time 2023-06-07 12:53:33 +01:00
2f3d03f188 Merge pull request #27 from LupoA/sp2n/documentation
documentation for gaugegroup and sp2n
2023-06-01 16:42:27 +00:00
8db7c23bee improve documentation 2023-06-01 17:39:10 +01:00
69dc5172dc Merge pull request #26 from LupoA/sp2n/irreps
Sp2n/irreps
2023-06-01 16:28:15 +00:00
fd72eb6546 Merge branch 'sp2n/algorithm' into sp2n/irreps 2023-06-01 17:24:01 +01:00
ffd7301649 Updated masked / fthmc smeared config container 2023-06-01 06:23:02 -04:00
d2a8494044 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-06-01 06:22:33 -04:00
0982e0d19b Jacobian action wrapper for FTHMC 2023-06-01 06:15:08 -04:00
3badbfc3c1 Refactor the Action and Smeared gauge configuration containers. Add first pass at FTHMC action 2023-06-01 06:14:28 -04:00
5465961e30 New test for FTHMC portion 2023-06-01 06:14:04 -04:00
477b794bc5 fix: unused variable removed. 2023-05-29 14:08:53 +01:00
4835fd1a87 HIP stream synch 2023-05-27 17:58:22 +03:00
6533c25814 Lumi 2023-05-27 16:13:32 +03:00
b405767569 make private methods private 2023-05-26 17:02:16 +01:00
fe88a0c12f cleaner twoindex class, cleaner tests 2023-05-26 16:55:30 +01:00
e61a9ed2b4 partial revert 2023-05-26 13:54:26 +01:00
de8daa3824 group is SUn by default 2023-05-26 13:44:41 +01:00
3a50fb29cb directly call sp helper 2023-05-26 13:28:47 +01:00
6647d2656f rm unnecessary specialisation 2023-05-26 12:27:22 +01:00
a6f4dbeb6d remove redundant template parameter 2023-05-26 12:13:40 +01:00
92a282f2d8 Merge pull request #24 from LupoA/sp2n/fix_static_assert_symmetric
Move static_assert inside of function
2023-05-26 11:13:50 +01:00
ca2fd9fc7b documentation for gaugegroup and sp2n 2023-05-25 18:40:54 +01:00
be1a4f5860 implement TwoIndexSymm for sp2n 2023-05-22 17:21:03 +01:00
1b2914ec09 FT-HMC smearing, derivative chain rule, log det and force first pass. 2023-05-22 10:21:37 -04:00
519f795066 Header not liked by gcc on mac? puzzling 2023-05-22 10:21:12 -04:00
5897b93dd4 debug tests, fix dimension 2023-05-22 13:42:21 +01:00
af091e0881 DimensionHelper for 2index irreps 2023-05-21 16:56:06 +01:00
3c1e5e9517 Merge pull request #25 from LupoA/sp2n/unify_representations
Sp2n/unify representations [close #3]
2023-05-21 14:55:27 +01:00
85b2cb7a8a changing some hardcoded SUn lines 2023-05-21 14:50:28 +01:00
4240ad5ca8 Preparing for FTHMC 2023-05-19 21:21:55 -04:00
d418347d86 public for convenience to see rho params 2023-05-19 21:21:05 -04:00
29a4bfe5e5 Clean up 2023-05-19 21:20:45 -04:00
9955bf9daf Regresses to Qlat 2023-05-19 17:32:13 -04:00
b8bdc2eefb Unified two index representations 2023-05-18 18:36:29 +01:00
0078826ff1 Move static_assert inside of function 2023-05-18 18:14:53 +01:00
e855c41772 Unified spfundamental.h with fundamental.h 2023-05-18 18:11:20 +01:00
d169c275b6 Merge pull request #22 from LupoA/sp2n/unify_twoindex
Unify TwoIndex
2023-05-18 14:55:02 +00:00
a5125e23f4 Typo 2023-05-18 15:41:35 +01:00
7b83c80757 Merge branch 'sp2n/unify_twoindex' of github.com:LupoA/Grid into sp2n/unify_twoindex 2023-05-18 15:36:14 +01:00
e41821e206 Disable two index symmetric 2023-05-18 15:29:55 +01:00
5a75ab15a2 typo in 2S dim 2023-05-17 20:47:57 +01:00
932c783fbf 2AS for every Nc! 2023-05-17 20:22:05 +01:00
55f9cce577 Revert "Added automated HMC test for Nc=4"
This reverts commit eee27b8b30.
2023-05-17 09:17:48 +01:00
b3533ca847 correct tests (failing) 2023-05-16 17:43:52 +01:00
fd2a637010 test 2index 2023-05-16 14:10:39 +01:00
eee27b8b30 Added automated HMC test for Nc=4 2023-05-15 18:37:33 +01:00
8522352aa3 ClangFormat 2023-05-15 18:36:05 +01:00
3beb8f4091 fixing typo, getting pre-changes physics 2023-05-15 16:00:15 +01:00
12a706e9b1 de-hardcode the number of generators 2023-05-15 15:48:21 +01:00
170aa7df01 fix (dimension to be improved) 2023-05-15 15:20:18 +01:00
e8ad1fef53 Unify TwoIndex 2023-05-12 14:35:50 +01:00
876c8f4478 Nodes on padded cell 2023-05-11 12:35:49 -04:00
9c8750f261 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-05-11 12:29:09 -04:00
91efd08179 Option for Qlat generator basis 2023-05-11 12:27:45 -04:00
9953511b65 Mac compile 2023-05-11 12:27:29 -04:00
025fa9991a For FTHMC 2023-05-11 12:26:14 -04:00
e8c60c355b Padded cell code 2023-05-11 12:25:50 -04:00
6c9c7f9d85 Permute fix 2023-05-11 12:24:21 -04:00
f534523ede Debug 2023-05-11 12:23:11 -04:00
1b8a834beb Debug 2023-05-11 12:22:24 -04:00
aa9df63a05 rename group projections based on determinants 2023-05-10 14:50:52 +01:00
3953312a93 Merge pull request #20 from LupoA/sp2n/unify_gaugeimpltypes
Sp2n/unify gaugeimpltypes
2023-05-03 15:17:10 +00:00
6e62f4f616 ClangFormat 2023-05-03 16:15:12 +01:00
6a7bdca53b Take over additional algebra tests from Alessandro 2023-05-03 16:02:02 +01:00
c7fba9aace Take over additional group tests from Alessandro 2023-05-03 16:01:48 +01:00
ac6c7cb8d6 Merge in Alessandro's changes [test fails] 2023-05-03 02:53:03 +01:00
c5924833a1 ClangFormat 2023-05-03 02:39:36 +01:00
ac0a74be0d Taken care of algebra tests 2023-05-03 02:32:42 +01:00
42b0e1125d Naming and argument types 2023-05-03 01:51:46 +01:00
339c4fda79 Extracted is_element_of Sp2n 2023-05-02 15:44:34 +01:00
9b85bf9402 better projection test 2023-05-02 15:42:20 +01:00
86b02c3cd8 cleaning up requested by Julian 2023-05-02 13:31:17 +01:00
7b3b7093fa cleaning up requested by Ed 2023-05-02 12:50:57 +01:00
881b08a465 Correct implementation of SpTa 2023-04-27 18:17:06 +01:00
3ee5444c69 Remove commented out stuff 2023-04-21 08:08:18 +01:00
5e28fe56d2 Remove code duplication: Iterating through vectors 2023-04-21 08:08:06 +01:00
3aa43e6065 Debug info 2023-04-20 14:21:13 -04:00
78ac4044ff HMC 2023-04-20 13:28:07 -04:00
119c3db47f Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-04-18 15:13:16 -04:00
21bbdb8fc2 Crusher 2023-04-18 15:11:16 -04:00
5aabe074fe Rename Sympl* to Sp* 2023-04-18 11:50:20 +01:00
739bd7572c Example code 2023-04-17 21:51:55 +00:00
074627a5bd Pass file descriptors through AF_UNIX for level_zero 2023-04-17 21:50:52 +00:00
6a23b2c599 Drop UVM 2023-04-17 21:49:58 +00:00
dace904c10 fix typo 2023-04-14 18:06:18 +01:00
be98d26610 small change I missed in previous commit 2023-04-13 17:48:43 +01:00
bd891fb3f5 tests to compile 2023-04-12 18:32:44 -04:00
3984265851 Merge pull request #432 from paboyle/hotfix/nvcc-warnings
Unused statements generating warnings removed
2023-04-12 16:59:02 -04:00
45361d188f Merge pull request #427 from fjosw/feat/bug_report_issue_template
Feat/bug report issue template
2023-04-12 16:58:41 -04:00
80c9d77e02 Merge pull request #433 from paboyle/hotfix/virtual-dtor
Virtual destructor for LinearOperator
2023-04-12 16:56:18 -04:00
3aff64dddb Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-04-11 12:19:15 -07:00
b4f2ca81ff Copy queue and compute queue same as better concurrency 2023-04-11 12:18:21 -07:00
d1dea5f840 New driver 2023-04-11 12:16:52 -07:00
54f8b84d16 Fence 2023-04-11 12:16:08 -07:00
da503fef0e Name change on barrier routine 2023-04-11 12:14:04 -07:00
4a6802098a Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-04-07 15:43:28 -04:00
f9b41a84d2 Trajectory runs to completion on Crusher within wall clock time 2023-04-07 15:42:45 -04:00
9e64387933 mores unused statements removed 2023-04-07 14:27:18 +01:00
983b681d46 unused statement cleaning 2023-04-07 14:12:02 +01:00
178376f24b minor stylistic changes 2023-04-06 12:08:17 +01:00
6a0eb466ee Merge pull request #19 from LupoA/refactoring_sp2n
refactoring sp2n
2023-04-05 10:50:58 +00:00
4ea29b8f0f Template group into GaugeImplTypes. Closing #2 2023-04-04 17:49:28 +01:00
778291230a expand ProjecOnGaugeGroup, change ProjectOnSp2nAlgebra into SpTa, fixing some of its issues 2023-04-04 17:48:13 +01:00
86dac5ff4f Better printing 2023-04-04 07:42:19 -07:00
4a382fad3f Use distinct SYCL queue for copies 2023-04-04 07:41:41 -07:00
cc753670d9 Barrier elimination, surface list build 2023-04-04 07:39:14 -07:00
cc9d88ea1c Fence changes and EXT kernel loop cout reduction 2023-04-04 07:37:23 -07:00
b281b0166e Put the barrier in the subroutine 2023-04-04 07:36:03 -07:00
6a21f694ff Apply barrier in Gather kernel sequence.
Could place before comms, or in Gather, but decided to insist Gather means Gather is done
2023-04-04 07:33:24 -07:00
026e736dfa Projection on algebra can now be templated. Fix #12 2023-04-03 16:31:19 +01:00
4275b3f431 Fix typo and remove unnecessary lines 2023-04-03 12:01:52 +01:00
39214702f6 feat: indentation fixed. 2023-03-28 16:30:34 +02:00
3e4614c63a feat: draft for bug-report issue template added. 2023-03-28 16:24:35 +02:00
1b8176e2c0 fix code duplication 2023-03-17 14:58:00 +00:00
cbc053c3db Revert "projection on Sp2n algebra, to be used instead of Ta"
This reverts commit ba7f9d7b70.
2023-03-17 11:36:58 +00:00
cdf3f6ef6e Merge branch 'refactoring_sp2n' of https://github.com/LupoA/Grid into refactoring_sp2n 2023-03-15 15:59:50 +00:00
ba7f9d7b70 projection on Sp2n algebra, to be used instead of Ta 2023-03-15 15:55:12 +00:00
371fd123fb consequence of iSUnMatrix being no longer a member of the SU class 2023-03-14 10:47:07 +00:00
d6ff644aab Towards the day all tests compile 2023-03-14 10:43:25 +00:00
29586f6b5e Deactivate some tests for Nc!=3 2023-03-13 08:17:14 +00:00
fd057c838f add ProjectOnGaugeGroup and ProjectGn to allow future templating in GaugeImplTypes 2023-03-10 12:10:46 +00:00
f51222086c Move functions from GaugeGroup to group specific implementations 2023-03-09 16:22:20 +00:00
f73691ec47 Merge pull request #18 from nickforce989/sp2n/newbranch
Sp2n/newbranch
2023-02-13 10:22:27 +01:00
ccd21f96ff Plaquette agreeing and moving to final form (slowly) need to optimise 2023-02-01 22:57:44 -05:00
4b90cb8888 First cut passes combining padded cell with general stencil towards fast plaquette and staggered force 2023-02-01 22:14:10 -05:00
7ebda3e9ec Merge commit 'b10e1b7bc8bec809f874e9e48a3ccc7b2619c9d1' into sp2n/newbranch 2023-01-19 12:10:18 +00:00
b10e1b7bc8 Fixed files giving zero force computation on GPU, issue #8 2023-01-18 18:04:47 +00:00
d7dea44ce7 Merge pull request #17 from chillenzer/unify_gauge_groups
Fix compilation error in nvcc (closes #15)
2022-12-19 16:24:03 +00:00
37b6b82869 Fix file extensions 2022-12-18 16:12:56 +00:00
92ad5b8f74 Compiler error fix: NVCC requires names for templ. par. 2022-12-18 15:50:19 +00:00
8c80f1c168 Merge pull request #14 from chillenzer/unify_gauge_groups
Unify gauge groups (closes #5)
2022-12-01 17:35:46 +00:00
0af7d5a793 Rename Grid/qcd/utils/<Group>_impl.h -> Grid/qcd/utils/<Group>.h 2022-11-30 17:12:00 +00:00
505fa49983 Renamed SUn.h -> GaugeGroup.h 2022-11-30 17:09:48 +00:00
7bcf33def9 Removed Sp2n.h 2022-11-30 16:59:46 +00:00
a13820656a Removed iSUnMatrix, etc. 2022-11-30 15:09:03 +00:00
fa71b46a41 Hide nsp 2022-11-30 14:44:23 +00:00
b8b3ae6ac1 Make helper functions private 2022-11-30 13:29:14 +00:00
55c008da21 Removed forward declaration 2022-11-30 13:12:21 +00:00
2507606bd0 With function overloading (still dirty). 2022-11-30 12:54:36 +00:00
7c2ad4f8c8 Attempt with SFINAE (failed) 2022-11-30 11:57:39 +00:00
54c8025aad Remove unnecessary pwd in scripts/filelist 2022-11-28 17:50:38 +00:00
921e23e83c Separated out everything SU specific 2022-11-28 17:47:50 +00:00
6e750ecb0e Remove apparently forgotten file 2022-11-28 16:33:46 +00:00
b8f1f5d2a3 Introduce GaugeGroup 2022-11-25 17:45:32 +00:00
9273f2937c Autoformat google style 2022-11-25 17:44:08 +00:00
1aa28b47ae Add existing test to check 2022-11-25 17:40:40 +00:00
629cb2987a Fix typo in Makefile.am 2022-11-25 17:40:21 +00:00
03235d6368 Fixed type in configure.ac 2022-11-25 16:57:40 +00:00
22064c7e4c Fixing #11 2022-11-25 13:10:29 +00:00
2de03e5172 Revert "Revert "Fixing issue #11: consistent use of ncolour and nsp""
This reverts commit 3af4929dda.
2022-11-23 19:40:28 +00:00
3af4929dda Revert "Fixing issue #11: consistent use of ncolour and nsp"
This reverts commit 1ba429345b.
2022-11-23 19:34:59 +00:00
1ba429345b Fixing issue #11: consistent use of ncolour and nsp 2022-11-23 18:45:01 +00:00
88bdd4344b 2indx antisymm representation of sp2n 2021-11-04 18:27:35 +00:00
4044536eea add projection on sp2n algebra 2021-10-26 10:20:44 +01:00
4d8ae6221c fix projection 2021-10-22 10:44:54 +01:00
4e31e4e094 Better tests 2021-10-13 15:07:23 +01:00
0d6674e489 hot start for sp2n 2021-10-12 18:53:54 +01:00
b145fd4f5b necessary to merge 2021-10-12 17:08:46 +01:00
8a5b794f25 necessary change to merge with upstrm 2021-10-12 16:04:03 +01:00
291e80f88a sp2n as config option 2021-10-12 16:00:32 +01:00
1ace5850ae first hmc 2021-10-12 16:00:32 +01:00
283f14b7c1 fix sp2n projection 2021-10-12 16:00:32 +01:00
1d6e708083 tests! 2021-10-12 16:00:32 +01:00
89457e25e3 sp fermion instantiation 2021-10-12 16:00:32 +01:00
7e3b298d3d project on sp2n 2021-10-12 16:00:32 +01:00
7ff3e5eed4 gauge and fermion implementation for sp2n 2021-10-12 16:00:32 +01:00
19eb51cf41 sp2n generators 2021-10-12 15:53:33 +01:00
470d4dcc6d sp2n as config option 2021-10-12 15:47:56 +01:00
ed03bfd555 first hmc 2021-10-12 12:16:47 +01:00
8c0fbcccae fix sp2n projection 2021-10-12 12:12:16 +01:00
d4866157fe tests! 2021-10-12 09:06:15 +01:00
b6496b6cb5 sp fermion instantiation 2021-10-11 16:32:10 +01:00
4f5fe57920 project on sp2n 2021-10-11 16:28:15 +01:00
11fb943b1e gauge and fermion implementation for sp2n 2021-10-11 16:21:25 +01:00
046a23121e sp2n generators 2021-10-05 15:51:22 +01:00
173 changed files with 11290 additions and 2365 deletions

54
.github/ISSUE_TEMPLATE/bug-report.yml vendored Normal file
View File

@ -0,0 +1,54 @@
name: Bug report
description: Report a bug.
title: "<insert title>"
labels: [bug]
body:
- type: markdown
attributes:
value: >
Thank you for taking the time to file a bug report.
Please check that the code is pointing to the HEAD of develop
or any commit in master which is tagged with a version number.
- type: textarea
attributes:
label: "Describe the issue:"
description: >
Describe the issue and any previous attempt to solve it.
validations:
required: true
- type: textarea
attributes:
label: "Code example:"
description: >
If relevant, show how to reproduce the issue using a minimal working
example.
placeholder: |
<< your code here >>
render: shell
validations:
required: false
- type: textarea
attributes:
label: "Target platform:"
description: >
Give a description of the target platform (CPU, network, compiler).
Please give the full CPU part description, using for example
`cat /proc/cpuinfo | grep 'model name' | uniq` (Linux)
or `sysctl machdep.cpu.brand_string` (macOS) and the full output
the `--version` option of your compiler.
validations:
required: true
- type: textarea
attributes:
label: "Configure options:"
description: >
Please give the exact configure command used and attach
`config.log`, `grid.config.summary` and the output of `make V=1`.
render: shell
validations:
required: true

View File

@ -66,6 +66,10 @@ if BUILD_FERMION_REPS
extra_sources+=$(ADJ_FERMION_FILES)
extra_sources+=$(TWOIND_FERMION_FILES)
endif
if BUILD_SP
extra_sources+=$(SP_FERMION_FILES)
extra_sources+=$(SP_TWOIND_FERMION_FILES)
endif
lib_LIBRARIES = libGrid.a

View File

@ -69,7 +69,8 @@ NAMESPACE_CHECK(BiCGSTAB);
#include <Grid/algorithms/iterative/PowerMethod.h>
NAMESPACE_CHECK(PowerMethod);
#include <Grid/algorithms/CoarsenedMatrix.h>
#include <Grid/algorithms/multigrid/MultiGrid.h>
NAMESPACE_CHECK(CoarsendMatrix);
#include <Grid/algorithms/FFT.h>

View File

@ -90,9 +90,8 @@ public:
order=_order;
if(order < 2) exit(-1);
Coeffs.resize(order);
Coeffs.assign(0.,order);
Coeffs[order-1] = 1.;
Coeffs.resize(order,0.0);
Coeffs[order-1] = 1.0;
};
// PB - more efficient low pass drops high modes above the low as 1/x uses all Chebyshev's.

View File

@ -33,218 +33,254 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
* Script A = SolverMatrix
* Script P = Preconditioner
*
* Deflation methods considered
* -- Solve P A x = P b [ like Luscher ]
* DEF-1 M P A x = M P b [i.e. left precon]
* DEF-2 P^T M A x = P^T M b
* ADEF-1 Preconditioner = M P + Q [ Q + M + M A Q]
* ADEF-2 Preconditioner = P^T M + Q
* BNN Preconditioner = P^T M P + Q
* BNN2 Preconditioner = M P + P^TM +Q - M P A M
*
* Implement ADEF-2
*
* Vstart = P^Tx + Qb
* M1 = P^TM + Q
* M2=M3=1
* Vout = x
*/
NAMESPACE_BEGIN(Grid);
// abstract base
template<class Field, class CoarseField>
class TwoLevelFlexiblePcg : public LinearFunction<Field>
template<class Field>
class TwoLevelCG : public LinearFunction<Field>
{
public:
int verbose;
RealD Tolerance;
Integer MaxIterations;
const int mmax = 5;
GridBase *grid;
GridBase *coarsegrid;
LinearOperatorBase<Field> *_Linop
OperatorFunction<Field> *_Smoother,
LinearFunction<CoarseField> *_CoarseSolver;
// Need somthing that knows how to get from Coarse to fine and back again
// Fine operator, Smoother, CoarseSolver
LinearOperatorBase<Field> &_FineLinop;
LinearFunction<Field> &_Smoother;
// more most opertor functions
TwoLevelFlexiblePcg(RealD tol,
Integer maxit,
LinearOperatorBase<Field> *Linop,
LinearOperatorBase<Field> *SmootherLinop,
OperatorFunction<Field> *Smoother,
OperatorFunction<CoarseField> CoarseLinop
) :
TwoLevelCG(RealD tol,
Integer maxit,
LinearOperatorBase<Field> &FineLinop,
LinearFunction<Field> &Smoother,
GridBase *fine) :
Tolerance(tol),
MaxIterations(maxit),
_Linop(Linop),
_PreconditionerLinop(PrecLinop),
_Preconditioner(Preconditioner)
_FineLinop(FineLinop),
_Smoother(Smoother)
{
verbose=0;
grid = fine;
};
// The Pcg routine is common to all, but the various matrices differ from derived
// implementation to derived implmentation
void operator() (const Field &src, Field &psi){
void operator() (const Field &src, Field &psi){
psi.Checkerboard() = src.Checkerboard();
grid = src.Grid();
virtual void operator() (const Field &src, Field &x)
{
Field resid(grid);
RealD f;
RealD rtzp,rtz,a,d,b;
RealD rptzp;
RealD tn;
RealD guess = norm2(psi);
RealD ssq = norm2(src);
RealD rsq = ssq*Tolerance*Tolerance;
/////////////////////////////
// Set up history vectors
/////////////////////////////
std::vector<Field> p (mmax,grid);
std::vector<Field> mmp(mmax,grid);
std::vector<RealD> pAp(mmax);
Field x (grid); x = psi;
Field z (grid);
Field p(grid);
Field z(grid);
Field tmp(grid);
Field mmp(grid);
Field r (grid);
Field mu (grid);
Field rp (grid);
//Initial residual computation & set up
double tn;
GridStopWatch HDCGTimer;
HDCGTimer.Start();
//////////////////////////
// x0 = Vstart -- possibly modify guess
//////////////////////////
x=src;
x=Zero();
Vstart(x,src);
// r0 = b -A x0
HermOp(x,mmp); // Shouldn't this be something else?
axpy (r, -1.0,mmp[0], src); // Recomputes r=src-Ax0
_FineLinop.HermOp(x,mmp);
axpy(r, -1.0, mmp, src); // Recomputes r=src-x0
rp=r;
//////////////////////////////////
// Compute z = M1 x
//////////////////////////////////
M1(r,z,tmp,mp,SmootherMirs);
PcgM1(r,z);
rtzp =real(innerProduct(r,z));
///////////////////////////////////////
// Solve for Mss mu = P A z and set p = z-mu
// Def2: p = 1 - Q Az = Pright z
// Other algos M2 is trivial
// Except Def2, M2 is trivial
///////////////////////////////////////
M2(z,p[0]);
p=z;
for (int k=0;k<=MaxIterations;k++){
RealD ssq = norm2(src);
RealD rsq = ssq*Tolerance*Tolerance;
int peri_k = k % mmax;
int peri_kp = (k+1) % mmax;
std::cout<<GridLogMessage<<"HDCG: k=0 residual "<<rtzp<<" target rsq "<<rsq<<" ssq "<<ssq<<std::endl;
for (int k=1;k<=MaxIterations;k++){
rtz=rtzp;
d= M3(p[peri_k],mp,mmp[peri_k],tmp);
d= PcgM3(p,mmp);
a = rtz/d;
// Memorise this
pAp[peri_k] = d;
axpy(x,a,p,x);
RealD rn = axpy_norm(r,-a,mmp,r);
axpy(x,a,p[peri_k],x);
RealD rn = axpy_norm(r,-a,mmp[peri_k],r);
// Compute z = M x
M1(r,z,tmp,mp);
PcgM1(r,z);
rtzp =real(innerProduct(r,z));
M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
p[peri_kp]=p[peri_k];
// Standard search direction p -> z + b p ; b =
b = (rtzp)/rtz;
int northog;
// northog = (peri_kp==0)?1:peri_kp; // This is the fCG(mmax) algorithm
northog = (k>mmax-1)?(mmax-1):k; // This is the fCG-Tr(mmax-1) algorithm
for(int back=0; back < northog; back++){
int peri_back = (k-back)%mmax;
RealD pbApk= real(innerProduct(mmp[peri_back],p[peri_kp]));
RealD beta = -pbApk/pAp[peri_back];
axpy(p[peri_kp],beta,p[peri_back],p[peri_kp]);
int ipcg=1; // almost free inexact preconditioned CG
if (ipcg) {
rptzp =real(innerProduct(rp,z));
} else {
rptzp =0;
}
b = (rtzp-rptzp)/rtz;
PcgM2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
axpy(p,b,p,mu); // mu = A r
RealD rrn=sqrt(rn/ssq);
std::cout<<GridLogMessage<<"TwoLevelfPcg: k= "<<k<<" residual = "<<rrn<<std::endl;
RealD rtn=sqrt(rtz/ssq);
std::cout<<GridLogMessage<<"HDCG: Pcg k= "<<k<<" residual = "<<rrn<<std::endl;
if ( ipcg ) {
axpy(rp,0.0,r,r);
}
// Stopping condition
if ( rn <= rsq ) {
HermOp(x,mmp); // Shouldn't this be something else?
axpy(tmp,-1.0,src,mmp[0]);
HDCGTimer.Stop();
std::cout<<GridLogMessage<<"HDCG: Pcg converged in "<<k<<" iterations and "<<HDCGTimer.Elapsed()<<std::endl;;
RealD psinorm = sqrt(norm2(x));
RealD srcnorm = sqrt(norm2(src));
RealD tmpnorm = sqrt(norm2(tmp));
RealD true_residual = tmpnorm/srcnorm;
std::cout<<GridLogMessage<<"TwoLevelfPcg: true residual is "<<true_residual<<std::endl;
std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl;
return k;
_FineLinop.HermOp(x,mmp);
axpy(tmp,-1.0,src,mmp);
RealD mmpnorm = sqrt(norm2(mmp));
RealD xnorm = sqrt(norm2(x));
RealD srcnorm = sqrt(norm2(src));
RealD tmpnorm = sqrt(norm2(tmp));
RealD true_residual = tmpnorm/srcnorm;
std::cout<<GridLogMessage
<<"HDCG: true residual is "<<true_residual
<<" solution "<<xnorm
<<" source "<<srcnorm
<<" mmp "<<mmpnorm
<<std::endl;
return;
}
}
// Non-convergence
assert(0);
std::cout<<GridLogMessage<<"HDCG: not converged"<<std::endl;
RealD xnorm = sqrt(norm2(x));
RealD srcnorm = sqrt(norm2(src));
std::cout<<GridLogMessage<<"HDCG: non-converged solution "<<xnorm<<" source "<<srcnorm<<std::endl;
return ;
}
public:
virtual void M(Field & in,Field & out,Field & tmp) {
virtual void PcgM1(Field & in, Field & out) =0;
virtual void Vstart(Field & x,const Field & src)=0;
virtual void PcgM2(const Field & in, Field & out) {
out=in;
}
virtual void M1(Field & in, Field & out) {// the smoother
virtual RealD PcgM3(const Field & p, Field & mmp){
RealD dd;
_FineLinop.HermOp(p,mmp);
ComplexD dot = innerProduct(p,mmp);
dd=real(dot);
return dd;
}
/////////////////////////////////////////////////////////////////////
// Only Def1 has non-trivial Vout.
/////////////////////////////////////////////////////////////////////
};
template<class Field, class CoarseField, class Aggregation>
class TwoLevelADEF2 : public TwoLevelCG<Field>
{
public:
///////////////////////////////////////////////////////////////////////////////////
// Need something that knows how to get from Coarse to fine and back again
// void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){
// void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){
///////////////////////////////////////////////////////////////////////////////////
GridBase *coarsegrid;
Aggregation &_Aggregates;
LinearFunction<CoarseField> &_CoarseSolver;
LinearFunction<CoarseField> &_CoarseSolverPrecise;
///////////////////////////////////////////////////////////////////////////////////
// more most opertor functions
TwoLevelADEF2(RealD tol,
Integer maxit,
LinearOperatorBase<Field> &FineLinop,
LinearFunction<Field> &Smoother,
LinearFunction<CoarseField> &CoarseSolver,
LinearFunction<CoarseField> &CoarseSolverPrecise,
Aggregation &Aggregates
) :
TwoLevelCG<Field>(tol,maxit,FineLinop,Smoother,Aggregates.FineGrid),
_CoarseSolver(CoarseSolver),
_CoarseSolverPrecise(CoarseSolverPrecise),
_Aggregates(Aggregates)
{
coarsegrid = Aggregates.CoarseGrid;
};
virtual void PcgM1(Field & in, Field & out)
{
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
Field tmp(grid);
Field Min(grid);
PcgM(in,Min); // Smoother call
Field tmp(this->grid);
Field Min(this->grid);
CoarseField PleftProj(this->coarsegrid);
CoarseField PleftMss_proj(this->coarsegrid);
HermOp(Min,out);
GridStopWatch SmootherTimer;
GridStopWatch MatrixTimer;
SmootherTimer.Start();
this->_Smoother(in,Min);
SmootherTimer.Stop();
MatrixTimer.Start();
this->_FineLinop.HermOp(Min,out);
MatrixTimer.Stop();
axpy(tmp,-1.0,out,in); // tmp = in - A Min
ProjectToSubspace(tmp,PleftProj);
ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
GridStopWatch ProjTimer;
GridStopWatch CoarseTimer;
GridStopWatch PromTimer;
ProjTimer.Start();
this->_Aggregates.ProjectToSubspace(PleftProj,tmp);
ProjTimer.Stop();
CoarseTimer.Start();
this->_CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
CoarseTimer.Stop();
PromTimer.Start();
this->_Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
PromTimer.Stop();
std::cout << GridLogPerformance << "PcgM1 breakdown "<<std::endl;
std::cout << GridLogPerformance << "\tSmoother " << SmootherTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tProj " << ProjTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tCoarse " << CoarseTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tProm " << PromTimer.Elapsed() <<std::endl;
axpy(out,1.0,Min,tmp); // Min+tmp
}
virtual void M2(const Field & in, Field & out) {
out=in;
// Must override for Def2 only
// case PcgDef2:
// Pright(in,out);
// break;
}
virtual RealD M3(const Field & p, Field & mmp){
double d,dd;
HermOpAndNorm(p,mmp,d,dd);
return dd;
// Must override for Def1 only
// case PcgDef1:
// d=linop_d->Mprec(p,mmp,tmp,0,1);// Dag no
// linop_d->Mprec(mmp,mp,tmp,1);// Dag yes
// Pleft(mp,mmp);
// d=real(linop_d->inner(p,mmp));
}
virtual void VstartDef2(Field & xconst Field & src){
//case PcgDef2:
//case PcgAdef2:
//case PcgAdef2f:
//case PcgV11f:
virtual void Vstart(Field & x,const Field & src)
{
///////////////////////////////////
// Choose x_0 such that
// x_0 = guess + (A_ss^inv) r_s = guess + Ass_inv [src -Aguess]
@ -256,142 +292,73 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
// = src_s - (A guess)_s - src_s + (A guess)_s
// = 0
///////////////////////////////////
Field r(grid);
Field mmp(grid);
Field r(this->grid);
Field mmp(this->grid);
CoarseField PleftProj(this->coarsegrid);
CoarseField PleftMss_proj(this->coarsegrid);
HermOp(x,mmp);
axpy (r, -1.0, mmp, src); // r_{-1} = src - A x
ProjectToSubspace(r,PleftProj);
ApplyInverseCG(PleftProj,PleftMss_proj); // Ass^{-1} r_s
PromoteFromSubspace(PleftMss_proj,mmp);
x=x+mmp;
this->_Aggregates.ProjectToSubspace(PleftProj,src);
this->_CoarseSolverPrecise(PleftProj,PleftMss_proj); // Ass^{-1} r_s
this->_Aggregates.PromoteFromSubspace(PleftMss_proj,x);
}
};
template<class Field>
class TwoLevelADEF1defl : public TwoLevelCG<Field>
{
public:
const std::vector<Field> &evec;
const std::vector<RealD> &eval;
TwoLevelADEF1defl(RealD tol,
Integer maxit,
LinearOperatorBase<Field> &FineLinop,
LinearFunction<Field> &Smoother,
std::vector<Field> &_evec,
std::vector<RealD> &_eval) :
TwoLevelCG<Field>(tol,maxit,FineLinop,Smoother,_evec[0].Grid()),
evec(_evec),
eval(_eval)
{};
// Can just inherit existing M2
// Can just inherit existing M3
// Simple vstart - do nothing
virtual void Vstart(Field & x,const Field & src){
return;
x=src; // Could apply Q
};
// Override PcgM1
virtual void PcgM1(Field & in, Field & out)
{
int N=evec.size();
Field Pin(this->grid);
Field Qin(this->grid);
//MP + Q = M(1-AQ) + Q = M
// // If we are eigenvector deflating in coarse space
// // Q = Sum_i |phi_i> 1/lambda_i <phi_i|
// // A Q = Sum_i |phi_i> <phi_i|
// // M(1-AQ) = M(1-proj) + Q
Qin.Checkerboard()=in.Checkerboard();
Qin = Zero();
Pin = in;
for (int i=0;i<N;i++) {
const Field& tmp = evec[i];
auto ip = TensorRemove(innerProduct(tmp,in));
axpy(Qin, ip / eval[i],tmp,Qin);
axpy(Pin, -ip ,tmp,Pin);
}
this->_Smoother(Pin,out);
out = out + Qin;
}
};
/////////////////////////////////////////////////////////////////////
// Only Def1 has non-trivial Vout. Override in Def1
/////////////////////////////////////////////////////////////////////
virtual void Vout (Field & in, Field & out,Field & src){
out = in;
//case PcgDef1:
// //Qb + PT x
// ProjectToSubspace(src,PleftProj);
// ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} r_s
// PromoteFromSubspace(PleftMss_proj,tmp);
//
// Pright(in,out);
//
// linop_d->axpy(out,tmp,out,1.0);
// break;
}
NAMESPACE_END(Grid);
////////////////////////////////////////////////////////////////////////////////////////////////
// Pright and Pleft are common to all implementations
////////////////////////////////////////////////////////////////////////////////////////////////
virtual void Pright(Field & in,Field & out){
// P_R = [ 1 0 ]
// [ -Mss^-1 Msb 0 ]
Field in_sbar(grid);
ProjectToSubspace(in,PleftProj);
PromoteFromSubspace(PleftProj,out);
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
HermOp(in_sbar,out);
ProjectToSubspace(out,PleftProj); // Mssbar in_sbar (project)
ApplyInverse (PleftProj,PleftMss_proj); // Mss^{-1} Mssbar
PromoteFromSubspace(PleftMss_proj,out); //
axpy(out,-1.0,out,in_sbar); // in_sbar - Mss^{-1} Mssbar in_sbar
}
virtual void Pleft (Field & in,Field & out){
// P_L = [ 1 -Mbs Mss^-1]
// [ 0 0 ]
Field in_sbar(grid);
Field tmp2(grid);
Field Mtmp(grid);
ProjectToSubspace(in,PleftProj);
PromoteFromSubspace(PleftProj,out);
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
ApplyInverse(PleftProj,PleftMss_proj); // Mss^{-1} in_s
PromoteFromSubspace(PleftMss_proj,out);
HermOp(out,Mtmp);
ProjectToSubspace(Mtmp,PleftProj); // Msbar s Mss^{-1}
PromoteFromSubspace(PleftProj,tmp2);
axpy(out,-1.0,tmp2,Mtmp);
axpy(out,-1.0,out,in_sbar); // in_sbar - Msbars Mss^{-1} in_s
}
}
template<class Field>
class TwoLevelFlexiblePcgADef2 : public TwoLevelFlexiblePcg<Field> {
public:
virtual void M(Field & in,Field & out,Field & tmp){
}
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp){
}
virtual void M2(Field & in, Field & out){
}
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp){
}
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp){
}
}
/*
template<class Field>
class TwoLevelFlexiblePcgAD : public TwoLevelFlexiblePcg<Field> {
public:
virtual void M(Field & in,Field & out,Field & tmp);
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
virtual void M2(Field & in, Field & out);
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
}
template<class Field>
class TwoLevelFlexiblePcgDef1 : public TwoLevelFlexiblePcg<Field> {
public:
virtual void M(Field & in,Field & out,Field & tmp);
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
virtual void M2(Field & in, Field & out);
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
virtual void Vout (Field & in, Field & out,Field & src,Field & tmp);
}
template<class Field>
class TwoLevelFlexiblePcgDef2 : public TwoLevelFlexiblePcg<Field> {
public:
virtual void M(Field & in,Field & out,Field & tmp);
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
virtual void M2(Field & in, Field & out);
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
}
template<class Field>
class TwoLevelFlexiblePcgV11: public TwoLevelFlexiblePcg<Field> {
public:
virtual void M(Field & in,Field & out,Field & tmp);
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
virtual void M2(Field & in, Field & out);
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
}
*/
#endif

View File

@ -183,13 +183,13 @@ public:
<< "\tTrue residual " << true_residual
<< "\tTarget " << Tolerance << std::endl;
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInner " << InnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "Time breakdown "<<std::endl;
std::cout << GridLogPerformance << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tInner " << InnerTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
std::cout << GridLogPerformance << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
std::cout << GridLogDebug << "\tMobius flop rate " << DwfFlops/ usecs<< " Gflops " <<std::endl;

View File

@ -166,16 +166,16 @@ public:
rsqf[s] =rsq[s];
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup: shift "<< s <<" target resid "<<rsq[s]<<std::endl;
// ps_d[s] = src_d;
precisionChangeFast(ps_f[s],src_d);
precisionChange(ps_f[s],src_d);
}
// r and p for primary
p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys
r_d = p_d;
//MdagM+m[0]
precisionChangeFast(p_f,p_d);
precisionChange(p_f,p_d);
Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
precisionChangeFast(tmp_d,mmp_f);
precisionChange(tmp_d,mmp_f);
Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
tmp_d = tmp_d - mmp_d;
std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
@ -204,7 +204,7 @@ public:
for(int s=0;s<nshift;s++) {
axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d);
precisionChangeFast(psi_f[s],psi_d[s]);
precisionChange(psi_f[s],psi_d[s]);
}
///////////////////////////////////////
@ -225,7 +225,7 @@ public:
AXPYTimer.Stop();
PrecChangeTimer.Start();
precisionChangeFast(r_f, r_d);
precisionChange(r_f, r_d);
PrecChangeTimer.Stop();
AXPYTimer.Start();
@ -243,13 +243,13 @@ public:
cp=c;
PrecChangeTimer.Start();
precisionChangeFast(p_f, p_d); //get back single prec search direction for linop
precisionChange(p_f, p_d); //get back single prec search direction for linop
PrecChangeTimer.Stop();
MatrixTimer.Start();
Linop_f.HermOp(p_f,mmp_f);
MatrixTimer.Stop();
PrecChangeTimer.Start();
precisionChangeFast(mmp_d, mmp_f); // From Float to Double
precisionChange(mmp_d, mmp_f); // From Float to Double
PrecChangeTimer.Stop();
d=real(innerProduct(p_d,mmp_d));
@ -311,7 +311,7 @@ public:
SolverTimer.Stop();
for(int s=0;s<nshift;s++){
precisionChangeFast(psi_d[s],psi_f[s]);
precisionChange(psi_d[s],psi_f[s]);
}

View File

@ -211,7 +211,7 @@ public:
Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
tmp_d = tmp_d - mmp_d;
std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
// assert(norm2(tmp_d)< 1.0e-4);
assert(norm2(tmp_d)< 1.0);
axpy(mmp_d,mass[0],p_d,mmp_d);
RealD rn = norm2(p_d);

View File

@ -419,14 +419,15 @@ until convergence
}
}
if ( Nconv < Nstop )
if ( Nconv < Nstop ) {
std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl;
std::cout << GridLogIRL << "returning Nstop vectors, the last "<< Nstop-Nconv << "of which might meet convergence criterion only approximately" <<std::endl;
}
eval=eval2;
//Keep only converged
eval.resize(Nconv);// Nstop?
evec.resize(Nconv,grid);// Nstop?
eval.resize(Nstop);// was Nconv
evec.resize(Nstop,grid);// was Nconv
basisSortInPlace(evec,eval,reverse);
}
@ -456,7 +457,7 @@ until convergence
std::vector<Field>& evec,
Field& w,int Nm,int k)
{
std::cout<<GridLogIRL << "Lanczos step " <<k<<std::endl;
std::cout<<GridLogDebug << "Lanczos step " <<k<<std::endl;
const RealD tiny = 1.0e-20;
assert( k< Nm );
@ -464,7 +465,7 @@ until convergence
Field& evec_k = evec[k];
_PolyOp(evec_k,w); std::cout<<GridLogIRL << "PolyOp" <<std::endl;
_PolyOp(evec_k,w); std::cout<<GridLogDebug << "PolyOp" <<std::endl;
if(k>0) w -= lme[k-1] * evec[k-1];
@ -479,18 +480,18 @@ until convergence
lme[k] = beta;
if ( (k>0) && ( (k % orth_period) == 0 )) {
std::cout<<GridLogIRL << "Orthogonalising " <<k<<std::endl;
std::cout<<GridLogDebug << "Orthogonalising " <<k<<std::endl;
orthogonalize(w,evec,k); // orthonormalise
std::cout<<GridLogIRL << "Orthogonalised " <<k<<std::endl;
std::cout<<GridLogDebug << "Orthogonalised " <<k<<std::endl;
}
if(k < Nm-1) evec[k+1] = w;
std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
std::cout<<GridLogIRL << "Lanczos step alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
if ( beta < tiny )
std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl;
std::cout<<GridLogIRL << "Lanczos step complete " <<k<<std::endl;
std::cout<<GridLogDebug << "Lanczos step complete " <<k<<std::endl;
}
void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,

View File

@ -33,7 +33,7 @@ NAMESPACE_BEGIN(Grid);
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form an NE solver calling a Herm solver
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class NormalEquations {
template<class Field> class NormalEquations : public LinearFunction<Field>{
private:
SparseMatrixBase<Field> & _Matrix;
OperatorFunction<Field> & _HermitianSolver;
@ -60,7 +60,7 @@ public:
}
};
template<class Field> class HPDSolver {
template<class Field> class HPDSolver : public LinearFunction<Field> {
private:
LinearOperatorBase<Field> & _Matrix;
OperatorFunction<Field> & _HermitianSolver;
@ -78,13 +78,13 @@ public:
void operator() (const Field &in, Field &out){
_Guess(in,out);
_HermitianSolver(_Matrix,in,out); // Mdag M out = Mdag in
_HermitianSolver(_Matrix,in,out); //M out = in
}
};
template<class Field> class MdagMSolver {
template<class Field> class MdagMSolver : public LinearFunction<Field> {
private:
SparseMatrixBase<Field> & _Matrix;
OperatorFunction<Field> & _HermitianSolver;

View File

@ -20,7 +20,7 @@ template<class Field> class PowerMethod
RealD evalMaxApprox = 0.0;
auto src_n = src;
auto tmp = src;
const int _MAX_ITER_EST_ = 50;
const int _MAX_ITER_EST_ = 100;
for (int i=0;i<_MAX_ITER_EST_;i++) {

View File

@ -0,0 +1,262 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/Aggregates.h
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
template<class Fobj,class CComplex,int nbasis>
class Aggregation {
public:
typedef iVector<CComplex,nbasis > siteVector;
typedef Lattice<siteVector> CoarseVector;
typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj > FineField;
GridBase *CoarseGrid;
GridBase *FineGrid;
std::vector<Lattice<Fobj> > subspace;
int checkerboard;
int Checkerboard(void){return checkerboard;}
Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :
CoarseGrid(_CoarseGrid),
FineGrid(_FineGrid),
subspace(nbasis,_FineGrid),
checkerboard(_checkerboard)
{
};
void Orthogonalise(void){
CoarseScalar InnerProd(CoarseGrid);
// std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 1"<<std::endl;
blockOrthogonalise(InnerProd,subspace);
}
void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){
blockProject(CoarseVec,FineVec,subspace);
}
void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){
FineVec.Checkerboard() = subspace[0].Checkerboard();
blockPromote(CoarseVec,FineVec,subspace);
}
virtual void CreateSubspaceRandom(GridParallelRNG &RNG) {
int nn=nbasis;
RealD scale;
FineField noise(FineGrid);
for(int b=0;b<nn;b++){
subspace[b] = Zero();
gaussian(RNG,noise);
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
subspace[b] = noise;
}
}
virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis)
{
RealD scale;
ConjugateGradient<FineField> CG(1.0e-2,100,false);
FineField noise(FineGrid);
FineField Mn(FineGrid);
for(int b=0;b<nn;b++){
subspace[b] = Zero();
gaussian(RNG,noise);
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
for(int i=0;i<1;i++){
CG(hermop,noise,subspace[b]);
noise = subspace[b];
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
}
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(Mn)<<std::endl;
subspace[b] = noise;
}
}
////////////////////////////////////////////////////////////////////////////////////////////////
// World of possibilities here. But have tried quite a lot of experiments (250+ jobs run on Summit)
// and this is the best I found
////////////////////////////////////////////////////////////////////////////////////////////////
virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,
int nn,
double hi,
double lo,
int orderfilter,
int ordermin,
int orderstep,
double filterlo
) {
RealD scale;
FineField noise(FineGrid);
FineField Mn(FineGrid);
FineField tmp(FineGrid);
// New normalised noise
gaussian(RNG,noise);
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
std::cout << GridLogMessage<<" Chebyshev subspace pass-1 : ord "<<orderfilter<<" ["<<lo<<","<<hi<<"]"<<std::endl;
std::cout << GridLogMessage<<" Chebyshev subspace pass-2 : nbasis"<<nn<<" min "
<<ordermin<<" step "<<orderstep
<<" lo"<<filterlo<<std::endl;
// Initial matrix element
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
int b =0;
{
// Filter
Chebyshev<FineField> Cheb(lo,hi,orderfilter);
Cheb(hermop,noise,Mn);
// normalise
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
subspace[b] = Mn;
hermop.Op(Mn,tmp);
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
b++;
}
// Generate a full sequence of Chebyshevs
{
lo=filterlo;
noise=Mn;
FineField T0(FineGrid); T0 = noise;
FineField T1(FineGrid);
FineField T2(FineGrid);
FineField y(FineGrid);
FineField *Tnm = &T0;
FineField *Tn = &T1;
FineField *Tnp = &T2;
// Tn=T1 = (xscale M + mscale)in
RealD xscale = 2.0/(hi-lo);
RealD mscale = -(hi+lo)/(hi-lo);
hermop.HermOp(T0,y);
T1=y*xscale+noise*mscale;
for(int n=2;n<=ordermin+orderstep*(nn-2);n++){
hermop.HermOp(*Tn,y);
autoView( y_v , y, AcceleratorWrite);
autoView( Tn_v , (*Tn), AcceleratorWrite);
autoView( Tnp_v , (*Tnp), AcceleratorWrite);
autoView( Tnm_v , (*Tnm), AcceleratorWrite);
const int Nsimd = CComplex::Nsimd();
accelerator_for(ss, FineGrid->oSites(), Nsimd, {
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
});
// Possible more fine grained control is needed than a linear sweep,
// but huge productivity gain if this is simple algorithm and not a tunable
int m =1;
if ( n>=ordermin ) m=n-ordermin;
if ( (m%orderstep)==0 ) {
Mn=*Tnp;
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
subspace[b] = Mn;
hermop.Op(Mn,tmp);
std::cout<<GridLogMessage << n<<" filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
b++;
}
// Cycle pointers to avoid copies
FineField *swizzle = Tnm;
Tnm =Tn;
Tn =Tnp;
Tnp =swizzle;
}
}
assert(b==nn);
}
virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,
int nn,
double hi,
double lo,
int orderfilter
) {
RealD scale;
FineField noise(FineGrid);
FineField Mn(FineGrid);
FineField tmp(FineGrid);
// New normalised noise
std::cout << GridLogMessage<<" Chebyshev subspace pure noise : ord "<<orderfilter<<" ["<<lo<<","<<hi<<"]"<<std::endl;
std::cout << GridLogMessage<<" Chebyshev subspace pure noise : nbasis "<<nn<<std::endl;
for(int b =0;b<nbasis;b++)
{
gaussian(RNG,noise);
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
// Initial matrix element
hermop.Op(noise,Mn);
if(b==0) std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
// Filter
Chebyshev<FineField> Cheb(lo,hi,orderfilter);
Cheb(hermop,noise,Mn);
// normalise
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
subspace[b] = Mn;
hermop.Op(Mn,tmp);
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
}
}
};
NAMESPACE_END(Grid);

View File

@ -56,243 +56,6 @@ inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner,
blockSum(CoarseInner,fine_inner_msk);
}
class Geometry {
public:
int npoint;
int base;
std::vector<int> directions ;
std::vector<int> displacements;
std::vector<int> points_dagger;
Geometry(int _d) {
base = (_d==5) ? 1:0;
// make coarse grid stencil for 4d , not 5d
if ( _d==5 ) _d=4;
npoint = 2*_d+1;
directions.resize(npoint);
displacements.resize(npoint);
points_dagger.resize(npoint);
for(int d=0;d<_d;d++){
directions[d ] = d+base;
directions[d+_d] = d+base;
displacements[d ] = +1;
displacements[d+_d]= -1;
points_dagger[d ] = d+_d;
points_dagger[d+_d] = d;
}
directions [2*_d]=0;
displacements[2*_d]=0;
points_dagger[2*_d]=2*_d;
}
int point(int dir, int disp) {
assert(disp == -1 || disp == 0 || disp == 1);
assert(base+0 <= dir && dir < base+4);
// directions faster index = new indexing
// 4d (base = 0):
// point 0 1 2 3 4 5 6 7 8
// dir 0 1 2 3 0 1 2 3 0
// disp +1 +1 +1 +1 -1 -1 -1 -1 0
// 5d (base = 1):
// point 0 1 2 3 4 5 6 7 8
// dir 1 2 3 4 1 2 3 4 0
// disp +1 +1 +1 +1 -1 -1 -1 -1 0
// displacements faster index = old indexing
// 4d (base = 0):
// point 0 1 2 3 4 5 6 7 8
// dir 0 0 1 1 2 2 3 3 0
// disp +1 -1 +1 -1 +1 -1 +1 -1 0
// 5d (base = 1):
// point 0 1 2 3 4 5 6 7 8
// dir 1 1 2 2 3 3 4 4 0
// disp +1 -1 +1 -1 +1 -1 +1 -1 0
if(dir == 0 and disp == 0)
return 8;
else // New indexing
return (1 - disp) / 2 * 4 + dir - base;
// else // Old indexing
// return (4 * (dir - base) + 1 - disp) / 2;
}
};
template<class Fobj,class CComplex,int nbasis>
class Aggregation {
public:
typedef iVector<CComplex,nbasis > siteVector;
typedef Lattice<siteVector> CoarseVector;
typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj > FineField;
GridBase *CoarseGrid;
GridBase *FineGrid;
std::vector<Lattice<Fobj> > subspace;
int checkerboard;
int Checkerboard(void){return checkerboard;}
Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :
CoarseGrid(_CoarseGrid),
FineGrid(_FineGrid),
subspace(nbasis,_FineGrid),
checkerboard(_checkerboard)
{
};
void Orthogonalise(void){
CoarseScalar InnerProd(CoarseGrid);
std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 1"<<std::endl;
blockOrthogonalise(InnerProd,subspace);
}
void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){
blockProject(CoarseVec,FineVec,subspace);
}
void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){
FineVec.Checkerboard() = subspace[0].Checkerboard();
blockPromote(CoarseVec,FineVec,subspace);
}
virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
RealD scale;
ConjugateGradient<FineField> CG(1.0e-2,100,false);
FineField noise(FineGrid);
FineField Mn(FineGrid);
for(int b=0;b<nn;b++){
subspace[b] = Zero();
gaussian(RNG,noise);
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
for(int i=0;i<1;i++){
CG(hermop,noise,subspace[b]);
noise = subspace[b];
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
}
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(Mn)<<std::endl;
subspace[b] = noise;
}
}
////////////////////////////////////////////////////////////////////////////////////////////////
// World of possibilities here. But have tried quite a lot of experiments (250+ jobs run on Summit)
// and this is the best I found
////////////////////////////////////////////////////////////////////////////////////////////////
virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,
int nn,
double hi,
double lo,
int orderfilter,
int ordermin,
int orderstep,
double filterlo
) {
RealD scale;
FineField noise(FineGrid);
FineField Mn(FineGrid);
FineField tmp(FineGrid);
// New normalised noise
gaussian(RNG,noise);
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
// Initial matrix element
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
int b =0;
{
// Filter
Chebyshev<FineField> Cheb(lo,hi,orderfilter);
Cheb(hermop,noise,Mn);
// normalise
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
subspace[b] = Mn;
hermop.Op(Mn,tmp);
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
b++;
}
// Generate a full sequence of Chebyshevs
{
lo=filterlo;
noise=Mn;
FineField T0(FineGrid); T0 = noise;
FineField T1(FineGrid);
FineField T2(FineGrid);
FineField y(FineGrid);
FineField *Tnm = &T0;
FineField *Tn = &T1;
FineField *Tnp = &T2;
// Tn=T1 = (xscale M + mscale)in
RealD xscale = 2.0/(hi-lo);
RealD mscale = -(hi+lo)/(hi-lo);
hermop.HermOp(T0,y);
T1=y*xscale+noise*mscale;
for(int n=2;n<=ordermin+orderstep*(nn-2);n++){
hermop.HermOp(*Tn,y);
autoView( y_v , y, AcceleratorWrite);
autoView( Tn_v , (*Tn), AcceleratorWrite);
autoView( Tnp_v , (*Tnp), AcceleratorWrite);
autoView( Tnm_v , (*Tnm), AcceleratorWrite);
const int Nsimd = CComplex::Nsimd();
accelerator_for(ss, FineGrid->oSites(), Nsimd, {
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
});
// Possible more fine grained control is needed than a linear sweep,
// but huge productivity gain if this is simple algorithm and not a tunable
int m =1;
if ( n>=ordermin ) m=n-ordermin;
if ( (m%orderstep)==0 ) {
Mn=*Tnp;
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
subspace[b] = Mn;
hermop.Op(Mn,tmp);
std::cout<<GridLogMessage << n<<" filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
b++;
}
// Cycle pointers to avoid copies
FineField *swizzle = Tnm;
Tnm =Tn;
Tn =Tnp;
Tnp =swizzle;
}
}
assert(b==nn);
}
};
// Fine Object == (per site) type of fine field
// nbasis == number of deflation vectors
template<class Fobj,class CComplex,int nbasis>

View File

@ -0,0 +1,449 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h
Copyright (C) 2015
Author: Peter Boyle <pboyle@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/QCD.h> // needed for Dagger(Yes|No), Inverse(Yes|No)
#include <Grid/lattice/PaddedCell.h>
#include <Grid/stencil/GeneralLocalStencil.h>
NAMESPACE_BEGIN(Grid);
// Fine Object == (per site) type of fine field
// nbasis == number of deflation vectors
template<class Fobj,class CComplex,int nbasis>
class GeneralCoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > {
public:
typedef GeneralCoarsenedMatrix<Fobj,CComplex,nbasis> GeneralCoarseOp;
typedef iVector<CComplex,nbasis > siteVector;
typedef iMatrix<CComplex,nbasis > siteMatrix;
typedef Lattice<iScalar<CComplex> > CoarseComplexField;
typedef Lattice<siteVector> CoarseVector;
typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
typedef iMatrix<CComplex,nbasis > Cobj;
typedef iVector<CComplex,nbasis > Cvec;
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj > FineField;
typedef CoarseVector Field;
////////////////////
// Data members
////////////////////
int hermitian;
GridBase * _FineGrid;
GridCartesian * _CoarseGrid;
NonLocalStencilGeometry &geom;
PaddedCell Cell;
GeneralLocalStencil Stencil;
std::vector<CoarseMatrix> _A;
std::vector<CoarseMatrix> _Adag;
std::vector<CoarseVector> MultTemporaries;
///////////////////////
// Interface
///////////////////////
GridBase * Grid(void) { return _FineGrid; }; // this is all the linalg routines need to know
GridBase * FineGrid(void) { return _FineGrid; }; // this is all the linalg routines need to know
GridCartesian * CoarseGrid(void) { return _CoarseGrid; }; // this is all the linalg routines need to know
void ProjectNearestNeighbour(RealD shift, GeneralCoarseOp &CopyMe)
{
int nfound=0;
std::cout << GridLogMessage <<"GeneralCoarsenedMatrix::ProjectNearestNeighbour "<< CopyMe._A[0].Grid()<<std::endl;
for(int p=0;p<geom.npoint;p++){
for(int pp=0;pp<CopyMe.geom.npoint;pp++){
// Search for the same relative shift
// Avoids brutal handling of Grid pointers
if ( CopyMe.geom.shifts[pp]==geom.shifts[p] ) {
_A[p] = CopyMe.Cell.Extract(CopyMe._A[pp]);
_Adag[p] = CopyMe.Cell.Extract(CopyMe._Adag[pp]);
nfound++;
}
}
}
assert(nfound==geom.npoint);
ExchangeCoarseLinks();
}
GeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom,GridBase *FineGrid, GridCartesian * CoarseGrid)
: geom(_geom),
_FineGrid(FineGrid),
_CoarseGrid(CoarseGrid),
hermitian(1),
Cell(_geom.Depth(),_CoarseGrid),
Stencil(Cell.grids.back(),geom.shifts)
{
{
int npoint = _geom.npoint;
autoView( Stencil_v , Stencil, AcceleratorRead);
int osites=Stencil.Grid()->oSites();
for(int ss=0;ss<osites;ss++){
for(int point=0;point<npoint;point++){
auto SE = Stencil_v.GetEntry(point,ss);
int o = SE->_offset;
assert( o< osites);
}
}
}
_A.resize(geom.npoint,CoarseGrid);
_Adag.resize(geom.npoint,CoarseGrid);
}
void M (const CoarseVector &in, CoarseVector &out)
{
Mult(_A,in,out);
}
void Mdag (const CoarseVector &in, CoarseVector &out)
{
if ( hermitian ) M(in,out);
else Mult(_Adag,in,out);
}
void Mult (std::vector<CoarseMatrix> &A,const CoarseVector &in, CoarseVector &out)
{
RealD tviews=0; RealD ttot=0; RealD tmult=0; RealD texch=0; RealD text=0; RealD ttemps=0; RealD tcopy=0;
ttot=-usecond();
conformable(CoarseGrid(),in.Grid());
conformable(in.Grid(),out.Grid());
out.Checkerboard() = in.Checkerboard();
CoarseVector tin=in;
texch-=usecond();
CoarseVector pin = Cell.ExchangePeriodic(tin);
texch+=usecond();
CoarseVector pout(pin.Grid());
int npoint = geom.npoint;
typedef LatticeView<Cobj> Aview;
typedef LatticeView<Cvec> Vview;
const int Nsimd = CComplex::Nsimd();
int64_t osites=pin.Grid()->oSites();
RealD flops = 1.0* npoint * nbasis * nbasis * 8.0 * osites * CComplex::Nsimd();
RealD bytes = 1.0*osites*sizeof(siteMatrix)*npoint
+ 2.0*osites*sizeof(siteVector)*npoint;
{
tviews-=usecond();
autoView( in_v , pin, AcceleratorRead);
autoView( out_v , pout, AcceleratorWriteDiscard);
autoView( Stencil_v , Stencil, AcceleratorRead);
tviews+=usecond();
// Static and prereserve to keep UVM region live and not resized across multiple calls
ttemps-=usecond();
MultTemporaries.resize(npoint,pin.Grid());
ttemps+=usecond();
std::vector<Aview> AcceleratorViewContainer_h;
std::vector<Vview> AcceleratorVecViewContainer_h;
tviews-=usecond();
for(int p=0;p<npoint;p++) {
AcceleratorViewContainer_h.push_back( A[p].View(AcceleratorRead));
AcceleratorVecViewContainer_h.push_back(MultTemporaries[p].View(AcceleratorWrite));
}
tviews+=usecond();
static deviceVector<Aview> AcceleratorViewContainer; AcceleratorViewContainer.resize(npoint);
static deviceVector<Vview> AcceleratorVecViewContainer; AcceleratorVecViewContainer.resize(npoint);
auto Aview_p = &AcceleratorViewContainer[0];
auto Vview_p = &AcceleratorVecViewContainer[0];
tcopy-=usecond();
acceleratorCopyToDevice(&AcceleratorViewContainer_h[0],&AcceleratorViewContainer[0],npoint *sizeof(Aview));
acceleratorCopyToDevice(&AcceleratorVecViewContainer_h[0],&AcceleratorVecViewContainer[0],npoint *sizeof(Vview));
tcopy+=usecond();
tmult-=usecond();
accelerator_for(spb, osites*nbasis*npoint, Nsimd, {
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
int32_t ss = spb/(nbasis*npoint);
int32_t bp = spb%(nbasis*npoint);
int32_t b = bp/npoint;
int32_t point= bp%npoint;
auto SE = Stencil_v.GetEntry(point,ss);
auto nbr = coalescedReadGeneralPermute(in_v[SE->_offset],SE->_permute,Nd);
auto res = coalescedRead(Aview_p[point][ss](b,0))*nbr(0);
for(int bb=1;bb<nbasis;bb++) {
res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
}
coalescedWrite(Vview_p[point][ss](b),res);
});
accelerator_for(sb, osites*nbasis, Nsimd, {
int ss = sb/nbasis;
int b = sb%nbasis;
auto res = coalescedRead(Vview_p[0][ss](b));
for(int point=1;point<npoint;point++){
res = res + coalescedRead(Vview_p[point][ss](b));
}
coalescedWrite(out_v[ss](b),res);
});
tmult+=usecond();
for(int p=0;p<npoint;p++) {
AcceleratorViewContainer_h[p].ViewClose();
AcceleratorVecViewContainer_h[p].ViewClose();
}
}
text-=usecond();
out = Cell.Extract(pout);
text+=usecond();
ttot+=usecond();
std::cout << GridLogPerformance<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Mult exch "<<texch<<" us"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Mult mult "<<tmult<<" us"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Mult ext "<<text<<" us"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Mult temps "<<ttemps<<" us"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Mult copy "<<tcopy<<" us"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Mult tot "<<ttot<<" us"<<std::endl;
// std::cout << GridLogPerformance<<std::endl;
// std::cout << GridLogPerformance<<"Coarse Kernel flop/s "<< flops/tmult<<" mflop/s"<<std::endl;
// std::cout << GridLogPerformance<<"Coarse Kernel bytes/s"<< bytes/tmult<<" MB/s"<<std::endl;
// std::cout << GridLogPerformance<<"Coarse overall flops/s "<< flops/ttot<<" mflop/s"<<std::endl;
// std::cout << GridLogPerformance<<"Coarse total bytes "<< bytes/1e6<<" MB"<<std::endl;
};
void PopulateAdag(void)
{
for(int64_t bidx=0;bidx<CoarseGrid()->gSites() ;bidx++){
Coordinate bcoor;
CoarseGrid()->GlobalIndexToGlobalCoor(bidx,bcoor);
for(int p=0;p<geom.npoint;p++){
Coordinate scoor = bcoor;
for(int mu=0;mu<bcoor.size();mu++){
int L = CoarseGrid()->GlobalDimensions()[mu];
scoor[mu] = (bcoor[mu] - geom.shifts[p][mu] + L) % L; // Modulo arithmetic
}
// Flip to poke/peekLocalSite and not too bad
auto link = peekSite(_A[p],scoor);
int pp = geom.Reverse(p);
pokeSite(adj(link),_Adag[pp],bcoor);
}
}
}
/////////////////////////////////////////////////////////////
//
// A) Only reduced flops option is to use a padded cell of depth 4
// and apply MpcDagMpc in the padded cell.
//
// Makes for ONE application of MpcDagMpc per vector instead of 30 or 80.
// With the effective cell size around (B+8)^4 perhaps 12^4/4^4 ratio
// Cost is 81x more, same as stencil size.
//
// But: can eliminate comms and do as local dirichlet.
//
// Local exchange gauge field once.
// Apply to all vectors, local only computation.
// Must exchange ghost subcells in reverse process of PaddedCell to take inner products
//
// B) Can reduce cost: pad by 1, apply Deo (4^4+6^4+8^4+8^4 )/ (4x 4^4)
// pad by 2, apply Doe
// pad by 3, apply Deo
// then break out 8x directions; cost is ~10x MpcDagMpc per vector
//
// => almost factor of 10 in setup cost, excluding data rearrangement
//
// Intermediates -- ignore the corner terms, leave approximate and force Hermitian
// Intermediates -- pad by 2 and apply 1+8+24 = 33 times.
/////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////
// BFM HDCG style approach: Solve a system of equations to get Aij
//////////////////////////////////////////////////////////
/*
* Here, k,l index which possible shift within the 3^Nd "ball" connected by MdagM.
*
* conj(phases[block]) proj[k][ block*Nvec+j ] = \sum_ball e^{i q_k . delta} < phi_{block,j} | MdagM | phi_{(block+delta),i} >
* = \sum_ball e^{iqk.delta} A_ji
*
* Must invert matrix M_k,l = e^[i q_k . delta_l]
*
* Where q_k = delta_k . (2*M_PI/global_nb[mu])
*/
void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop,
Aggregation<Fobj,CComplex,nbasis> & Subspace)
{
std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl;
GridBase *grid = FineGrid();
RealD tproj=0.0;
RealD teigen=0.0;
RealD tmat=0.0;
RealD tphase=0.0;
RealD tinv=0.0;
/////////////////////////////////////////////////////////////
// Orthogonalise the subblocks over the basis
/////////////////////////////////////////////////////////////
CoarseScalar InnerProd(CoarseGrid());
blockOrthogonalise(InnerProd,Subspace.subspace);
const int npoint = geom.npoint;
Coordinate clatt = CoarseGrid()->GlobalDimensions();
int Nd = CoarseGrid()->Nd();
/*
* Here, k,l index which possible momentum/shift within the N-points connected by MdagM.
* Matrix index i is mapped to this shift via
* geom.shifts[i]
*
* conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block]
* = \sum_{l in ball} e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} >
* = \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l}
* = M_{kl} A_ji^{b.b+l}
*
* Must assemble and invert matrix M_k,l = e^[i q_k . delta_l]
*
* Where q_k = delta_k . (2*M_PI/global_nb[mu])
*
* Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j}
*/
teigen-=usecond();
Eigen::MatrixXcd Mkl = Eigen::MatrixXcd::Zero(npoint,npoint);
Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint);
ComplexD ci(0.0,1.0);
for(int k=0;k<npoint;k++){ // Loop over momenta
for(int l=0;l<npoint;l++){ // Loop over nbr relative
ComplexD phase(0.0,0.0);
for(int mu=0;mu<Nd;mu++){
RealD TwoPiL = M_PI * 2.0/ clatt[mu];
phase=phase+TwoPiL*geom.shifts[k][mu]*geom.shifts[l][mu];
}
phase=exp(phase*ci);
Mkl(k,l) = phase;
}
}
invMkl = Mkl.inverse();
teigen+=usecond();
///////////////////////////////////////////////////////////////////////
// Now compute the matrix elements of linop between the orthonormal
// set of vectors.
///////////////////////////////////////////////////////////////////////
FineField phaV(grid); // Phased block basis vector
FineField MphaV(grid);// Matrix applied
CoarseVector coarseInner(CoarseGrid());
std::vector<CoarseVector> ComputeProj(npoint,CoarseGrid());
std::vector<CoarseVector> FT(npoint,CoarseGrid());
for(int i=0;i<nbasis;i++){// Loop over basis vectors
std::cout << GridLogMessage<< "CoarsenMatrixColoured vec "<<i<<"/"<<nbasis<< std::endl;
for(int p=0;p<npoint;p++){ // Loop over momenta in npoint
/////////////////////////////////////////////////////
// Stick a phase on every block
/////////////////////////////////////////////////////
tphase-=usecond();
CoarseComplexField coor(CoarseGrid());
CoarseComplexField pha(CoarseGrid()); pha=Zero();
for(int mu=0;mu<Nd;mu++){
LatticeCoordinate(coor,mu);
RealD TwoPiL = M_PI * 2.0/ clatt[mu];
pha = pha + (TwoPiL * geom.shifts[p][mu]) * coor;
}
pha =exp(pha*ci);
phaV=Zero();
blockZAXPY(phaV,pha,Subspace.subspace[i],phaV);
tphase+=usecond();
/////////////////////////////////////////////////////////////////////
// Multiple phased subspace vector by matrix and project to subspace
// Remove local bulk phase to leave relative phases
/////////////////////////////////////////////////////////////////////
tmat-=usecond();
linop.Op(phaV,MphaV);
tmat+=usecond();
tproj-=usecond();
blockProject(coarseInner,MphaV,Subspace.subspace);
coarseInner = conjugate(pha) * coarseInner;
ComputeProj[p] = coarseInner;
tproj+=usecond();
}
tinv-=usecond();
for(int k=0;k<npoint;k++){
FT[k] = Zero();
for(int l=0;l<npoint;l++){
FT[k]= FT[k]+ invMkl(l,k)*ComputeProj[l];
}
int osites=CoarseGrid()->oSites();
autoView( A_v , _A[k], AcceleratorWrite);
autoView( FT_v , FT[k], AcceleratorRead);
accelerator_for(sss, osites, 1, {
for(int j=0;j<nbasis;j++){
A_v[sss](j,i) = FT_v[sss](j);
}
});
}
tinv+=usecond();
}
for(int p=0;p<geom.npoint;p++){
Coordinate coor({0,0,0,0,0});
auto sval = peekSite(_A[p],coor);
}
// Only needed if nonhermitian
if ( ! hermitian ) {
std::cout << GridLogMessage<<"PopulateAdag "<<std::endl;
PopulateAdag();
}
// Need to write something to populate Adag from A
ExchangeCoarseLinks();
std::cout << GridLogMessage<<"CoarsenOperator eigen "<<teigen<<" us"<<std::endl;
std::cout << GridLogMessage<<"CoarsenOperator phase "<<tphase<<" us"<<std::endl;
std::cout << GridLogMessage<<"CoarsenOperator mat "<<tmat <<" us"<<std::endl;
std::cout << GridLogMessage<<"CoarsenOperator proj "<<tproj<<" us"<<std::endl;
std::cout << GridLogMessage<<"CoarsenOperator inv "<<tinv<<" us"<<std::endl;
}
void ExchangeCoarseLinks(void){
for(int p=0;p<geom.npoint;p++){
_A[p] = Cell.Exchange(_A[p]);
_Adag[p]= Cell.Exchange(_Adag[p]);
}
}
virtual void Mdiag (const Field &in, Field &out){ assert(0);};
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);};
virtual void MdirAll (const Field &in, std::vector<Field> &out){assert(0);};
};
NAMESPACE_END(Grid);

View File

@ -0,0 +1,243 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h
Copyright (C) 2015
Author: Peter Boyle <pboyle@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////
// Geometry class in cartesian case
/////////////////////////////////////////////////////////////////
class Geometry {
public:
int npoint;
int base;
std::vector<int> directions ;
std::vector<int> displacements;
std::vector<int> points_dagger;
Geometry(int _d) {
base = (_d==5) ? 1:0;
// make coarse grid stencil for 4d , not 5d
if ( _d==5 ) _d=4;
npoint = 2*_d+1;
directions.resize(npoint);
displacements.resize(npoint);
points_dagger.resize(npoint);
for(int d=0;d<_d;d++){
directions[d ] = d+base;
directions[d+_d] = d+base;
displacements[d ] = +1;
displacements[d+_d]= -1;
points_dagger[d ] = d+_d;
points_dagger[d+_d] = d;
}
directions [2*_d]=0;
displacements[2*_d]=0;
points_dagger[2*_d]=2*_d;
}
int point(int dir, int disp) {
assert(disp == -1 || disp == 0 || disp == 1);
assert(base+0 <= dir && dir < base+4);
// directions faster index = new indexing
// 4d (base = 0):
// point 0 1 2 3 4 5 6 7 8
// dir 0 1 2 3 0 1 2 3 0
// disp +1 +1 +1 +1 -1 -1 -1 -1 0
// 5d (base = 1):
// point 0 1 2 3 4 5 6 7 8
// dir 1 2 3 4 1 2 3 4 0
// disp +1 +1 +1 +1 -1 -1 -1 -1 0
// displacements faster index = old indexing
// 4d (base = 0):
// point 0 1 2 3 4 5 6 7 8
// dir 0 0 1 1 2 2 3 3 0
// disp +1 -1 +1 -1 +1 -1 +1 -1 0
// 5d (base = 1):
// point 0 1 2 3 4 5 6 7 8
// dir 1 1 2 2 3 3 4 4 0
// disp +1 -1 +1 -1 +1 -1 +1 -1 0
if(dir == 0 and disp == 0)
return 8;
else // New indexing
return (1 - disp) / 2 * 4 + dir - base;
// else // Old indexing
// return (4 * (dir - base) + 1 - disp) / 2;
}
};
/////////////////////////////////////////////////////////////////
// Less local equivalent of Geometry class in cartesian case
/////////////////////////////////////////////////////////////////
class NonLocalStencilGeometry {
public:
int depth;
int hops;
int npoint;
std::vector<Coordinate> shifts;
Coordinate stencil_size;
Coordinate stencil_lo;
Coordinate stencil_hi;
GridCartesian *grid;
GridCartesian *Grid() {return grid;};
int Depth(void){return 1;}; // Ghost zone depth
int Hops(void){return hops;}; // # of hops=> level of corner fill in in stencil
virtual int DimSkip(void) =0;
virtual ~NonLocalStencilGeometry() {};
int Reverse(int point)
{
int Nd = Grid()->Nd();
Coordinate shft = shifts[point];
Coordinate rev(Nd);
for(int mu=0;mu<Nd;mu++) rev[mu]= -shft[mu];
for(int p=0;p<npoint;p++){
if(rev==shifts[p]){
return p;
}
}
assert(0);
return -1;
}
void BuildShifts(void)
{
this->shifts.resize(0);
int Nd = this->grid->Nd();
int dd = this->DimSkip();
for(int s0=this->stencil_lo[dd+0];s0<=this->stencil_hi[dd+0];s0++){
for(int s1=this->stencil_lo[dd+1];s1<=this->stencil_hi[dd+1];s1++){
for(int s2=this->stencil_lo[dd+2];s2<=this->stencil_hi[dd+2];s2++){
for(int s3=this->stencil_lo[dd+3];s3<=this->stencil_hi[dd+3];s3++){
Coordinate sft(Nd,0);
sft[dd+0] = s0;
sft[dd+1] = s1;
sft[dd+2] = s2;
sft[dd+3] = s3;
int nhops = abs(s0)+abs(s1)+abs(s2)+abs(s3);
if(nhops<=this->hops) this->shifts.push_back(sft);
}}}}
this->npoint = this->shifts.size();
std::cout << GridLogMessage << "NonLocalStencilGeometry has "<< this->npoint << " terms in stencil "<<std::endl;
}
NonLocalStencilGeometry(GridCartesian *_coarse_grid,int _hops) : grid(_coarse_grid), hops(_hops)
{
Coordinate latt = grid->GlobalDimensions();
stencil_size.resize(grid->Nd());
stencil_lo.resize(grid->Nd());
stencil_hi.resize(grid->Nd());
for(int d=0;d<grid->Nd();d++){
if ( latt[d] == 1 ) {
stencil_lo[d] = 0;
stencil_hi[d] = 0;
stencil_size[d]= 1;
} else if ( latt[d] == 2 ) {
stencil_lo[d] = -1;
stencil_hi[d] = 0;
stencil_size[d]= 2;
} else if ( latt[d] > 2 ) {
stencil_lo[d] = -1;
stencil_hi[d] = 1;
stencil_size[d]= 3;
}
}
};
};
// Need to worry about red-black now
class NonLocalStencilGeometry4D : public NonLocalStencilGeometry {
public:
virtual int DimSkip(void) { return 0;};
NonLocalStencilGeometry4D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { };
virtual ~NonLocalStencilGeometry4D() {};
};
class NonLocalStencilGeometry5D : public NonLocalStencilGeometry {
public:
virtual int DimSkip(void) { return 1; };
NonLocalStencilGeometry5D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { };
virtual ~NonLocalStencilGeometry5D() {};
};
/*
* Bunch of different options classes
*/
class NextToNextToNextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D {
public:
NextToNextToNextToNearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,4)
{
this->BuildShifts();
};
};
class NextToNextToNextToNearestStencilGeometry5D : public NonLocalStencilGeometry5D {
public:
NextToNextToNextToNearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,4)
{
this->BuildShifts();
};
};
class NextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D {
public:
NextToNearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,2)
{
this->BuildShifts();
};
};
class NextToNearestStencilGeometry5D : public NonLocalStencilGeometry5D {
public:
NextToNearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,2)
{
this->BuildShifts();
};
};
class NearestStencilGeometry4D : public NonLocalStencilGeometry4D {
public:
NearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,1)
{
this->BuildShifts();
};
};
class NearestStencilGeometry5D : public NonLocalStencilGeometry5D {
public:
NearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,1)
{
this->BuildShifts();
};
};
NAMESPACE_END(Grid);

View File

@ -0,0 +1,33 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Grid/algorithms/multigrid/MultiGrid.h
Copyright (C) 2023
Author: Peter Boyle <pboyle@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/algorithms/multigrid/Aggregates.h>
#include <Grid/algorithms/multigrid/Geometry.h>
#include <Grid/algorithms/multigrid/CoarsenedMatrix.h>
#include <Grid/algorithms/multigrid/GeneralCoarsenedMatrix.h>

View File

@ -209,9 +209,9 @@ private:
static void CpuViewClose(uint64_t Ptr);
static uint64_t CpuViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint);
#endif
static void NotifyDeletion(void * CpuPtr);
public:
static void NotifyDeletion(void * CpuPtr);
static void Print(void);
static void PrintAll(void);
static void PrintState( void* CpuPtr);

View File

@ -8,7 +8,7 @@ NAMESPACE_BEGIN(Grid);
static char print_buffer [ MAXLINE ];
#define mprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer;
#define dprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer;
#define dprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogDebug << print_buffer;
//#define dprintf(...)
@ -111,7 +111,7 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
///////////////////////////////////////////////////////////
assert(AccCache.state!=Empty);
mprintf("MemoryManager: Discard(%lx) %lx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
dprintf("MemoryManager: Discard(%lx) %lx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
assert(AccCache.accLock==0);
assert(AccCache.cpuLock==0);
assert(AccCache.CpuPtr!=(uint64_t)NULL);
@ -141,7 +141,7 @@ void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
///////////////////////////////////////////////////////////////////////////
assert(AccCache.state!=Empty);
mprintf("MemoryManager: Evict cpu %lx acc %lx cpuLock %ld accLock %ld\n",
mprintf("MemoryManager: Evict CpuPtr %lx AccPtr %lx cpuLock %ld accLock %ld\n",
(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr,
(uint64_t)AccCache.cpuLock,(uint64_t)AccCache.accLock);
if (AccCache.accLock!=0) return;
@ -155,7 +155,7 @@ void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
AccCache.AccPtr=(uint64_t)NULL;
AccCache.state=CpuDirty; // CPU primary now
DeviceBytes -=AccCache.bytes;
dprintf("MemoryManager: Free(%lx) footprint now %ld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
dprintf("MemoryManager: Free(AccPtr %lx) footprint now %ld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
}
// uint64_t CpuPtr = AccCache.CpuPtr;
DeviceEvictions++;
@ -169,7 +169,7 @@ void MemoryManager::Flush(AcceleratorViewEntry &AccCache)
assert(AccCache.AccPtr!=(uint64_t)NULL);
assert(AccCache.CpuPtr!=(uint64_t)NULL);
acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes);
mprintf("MemoryManager: Flush %lx -> %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
mprintf("MemoryManager: acceleratorCopyFromDevice Flush AccPtr %lx -> CpuPtr %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
DeviceToHostBytes+=AccCache.bytes;
DeviceToHostXfer++;
AccCache.state=Consistent;
@ -184,7 +184,7 @@ void MemoryManager::Clone(AcceleratorViewEntry &AccCache)
AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes);
DeviceBytes+=AccCache.bytes;
}
mprintf("MemoryManager: Clone %lx <- %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
mprintf("MemoryManager: acceleratorCopyToDevice Clone AccPtr %lx <- CpuPtr %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes);
HostToDeviceBytes+=AccCache.bytes;
HostToDeviceXfer++;
@ -519,7 +519,6 @@ void MemoryManager::Audit(std::string s)
uint64_t LruBytes1=0;
uint64_t LruBytes2=0;
uint64_t LruCnt=0;
uint64_t LockedBytes=0;
std::cout << " Memory Manager::Audit() from "<<s<<std::endl;
for(auto it=LRU.begin();it!=LRU.end();it++){

View File

@ -70,8 +70,8 @@ public:
Coordinate _istride; // Inner stride i.e. within simd lane
int _osites; // _isites*_osites = product(dimensions).
int _isites;
int _fsites; // _isites*_osites = product(dimensions).
int _gsites;
int64_t _fsites; // _isites*_osites = product(dimensions).
int64_t _gsites;
Coordinate _slice_block;// subslice information
Coordinate _slice_stride;
Coordinate _slice_nblock;
@ -183,7 +183,7 @@ public:
inline int Nsimd(void) const { return _isites; };// Synonymous with iSites
inline int oSites(void) const { return _osites; };
inline int lSites(void) const { return _isites*_osites; };
inline int gSites(void) const { return _isites*_osites*_Nprocessors; };
inline int64_t gSites(void) const { return (int64_t)_isites*(int64_t)_osites*(int64_t)_Nprocessors; };
inline int Nd (void) const { return _ndimension;};
inline const Coordinate LocalStarts(void) { return _lstart; };
@ -214,7 +214,7 @@ public:
////////////////////////////////////////////////////////////////
// Global addressing
////////////////////////////////////////////////////////////////
void GlobalIndexToGlobalCoor(int gidx,Coordinate &gcoor){
void GlobalIndexToGlobalCoor(int64_t gidx,Coordinate &gcoor){
assert(gidx< gSites());
Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions);
}
@ -222,7 +222,7 @@ public:
assert(lidx<lSites());
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
}
void GlobalCoorToGlobalIndex(const Coordinate & gcoor,int & gidx){
void GlobalCoorToGlobalIndex(const Coordinate & gcoor,int64_t & gidx){
gidx=0;
int mult=1;
for(int mu=0;mu<_ndimension;mu++) {

View File

@ -138,6 +138,14 @@ public:
////////////////////////////////////////////////////////////
// Face exchange, buffer swap in translational invariant way
////////////////////////////////////////////////////////////
void CommsComplete(std::vector<CommsRequest_t> &list);
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes,int dir);
void SendToRecvFrom(void *xmit,
int xmit_to_rank,
void *recv,

View File

@ -306,6 +306,44 @@ void CartesianCommunicator::GlobalSumVector(double *d,int N)
int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes,int dir)
{
MPI_Request xrq;
MPI_Request rrq;
assert(dest != _processor);
assert(from != _processor);
int tag;
tag= dir+from*32;
int ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator,&rrq);
assert(ierr==0);
list.push_back(rrq);
tag= dir+_processor*32;
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator,&xrq);
assert(ierr==0);
list.push_back(xrq);
}
void CartesianCommunicator::CommsComplete(std::vector<CommsRequest_t> &list)
{
int nreq=list.size();
if (nreq==0) return;
std::vector<MPI_Status> status(nreq);
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
assert(ierr==0);
list.resize(0);
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFrom(void *xmit,
int dest,

View File

@ -91,6 +91,17 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
{
assert(0);
}
void CartesianCommunicator::CommsComplete(std::vector<CommsRequest_t> &list){ assert(0);}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes,int dir)
{
assert(0);
}
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes)
{
bcopy(in,out,bytes*words);

View File

@ -27,9 +27,10 @@ Author: Christoph Lehner <christoph@lhnr.de>
*************************************************************************************/
/* END LEGAL */
#define Mheader "SharedMemoryMpi: "
#include <Grid/GridCore.h>
#include <pwd.h>
#include <syscall.h>
#ifdef GRID_CUDA
#include <cuda_runtime_api.h>
@ -39,11 +40,118 @@ Author: Christoph Lehner <christoph@lhnr.de>
#endif
#ifdef GRID_SYCL
#define GRID_SYCL_LEVEL_ZERO_IPC
#include <syscall.h>
#define SHM_SOCKETS
#endif
#include <sys/socket.h>
#include <sys/un.h>
NAMESPACE_BEGIN(Grid);
#ifdef SHM_SOCKETS
/*
* Barbaric extra intranode communication route in case we need sockets to pass FDs
* Forced by level_zero not being nicely designed
*/
static int sock;
static const char *sock_path_fmt = "/tmp/GridUnixSocket.%d";
static char sock_path[256];
class UnixSockets {
public:
static void Open(int rank)
{
int errnum;
sock = socket(AF_UNIX, SOCK_DGRAM, 0); assert(sock>0);
struct sockaddr_un sa_un = { 0 };
sa_un.sun_family = AF_UNIX;
snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,rank);
unlink(sa_un.sun_path);
if (bind(sock, (struct sockaddr *)&sa_un, sizeof(sa_un))) {
perror("bind failure");
exit(EXIT_FAILURE);
}
}
static int RecvFileDescriptor(void)
{
int n;
int fd;
char buf[1];
struct iovec iov;
struct msghdr msg;
struct cmsghdr *cmsg;
char cms[CMSG_SPACE(sizeof(int))];
iov.iov_base = buf;
iov.iov_len = 1;
memset(&msg, 0, sizeof msg);
msg.msg_name = 0;
msg.msg_namelen = 0;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = (caddr_t)cms;
msg.msg_controllen = sizeof cms;
if((n=recvmsg(sock, &msg, 0)) < 0) {
perror("recvmsg failed");
return -1;
}
if(n == 0){
perror("recvmsg returned 0");
return -1;
}
cmsg = CMSG_FIRSTHDR(&msg);
memmove(&fd, CMSG_DATA(cmsg), sizeof(int));
return fd;
}
static void SendFileDescriptor(int fildes,int xmit_to_rank)
{
struct msghdr msg;
struct iovec iov;
struct cmsghdr *cmsg = NULL;
char ctrl[CMSG_SPACE(sizeof(int))];
char data = ' ';
memset(&msg, 0, sizeof(struct msghdr));
memset(ctrl, 0, CMSG_SPACE(sizeof(int)));
iov.iov_base = &data;
iov.iov_len = sizeof(data);
sprintf(sock_path,sock_path_fmt,xmit_to_rank);
struct sockaddr_un sa_un = { 0 };
sa_un.sun_family = AF_UNIX;
snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,xmit_to_rank);
msg.msg_name = (void *)&sa_un;
msg.msg_namelen = sizeof(sa_un);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_controllen = CMSG_SPACE(sizeof(int));
msg.msg_control = ctrl;
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
*((int *) CMSG_DATA(cmsg)) = fildes;
sendmsg(sock, &msg, 0);
};
};
#endif
NAMESPACE_BEGIN(Grid);
#define header "SharedMemoryMpi: "
/*Construct from an MPI communicator*/
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
{
@ -66,8 +174,8 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
MPI_Comm_size(WorldShmComm ,&WorldShmSize);
if ( WorldRank == 0) {
std::cout << header " World communicator of size " <<WorldSize << std::endl;
std::cout << header " Node communicator of size " <<WorldShmSize << std::endl;
std::cout << Mheader " World communicator of size " <<WorldSize << std::endl;
std::cout << Mheader " Node communicator of size " <<WorldShmSize << std::endl;
}
// WorldShmComm, WorldShmSize, WorldShmRank
@ -170,10 +278,7 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M
if(nscan==3 && HPEhypercube ) OptimalCommunicatorHypercube(processors,optimal_comm,SHM);
else OptimalCommunicatorSharedMemory(processors,optimal_comm,SHM);
}
static inline int divides(int a,int b)
{
return ( b == ( (b/a)*a ) );
}
void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
{
////////////////////////////////////////////////////////////////
@ -347,7 +452,7 @@ void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const Coordinate &proce
#ifdef GRID_MPI3_SHMGET
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << header "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
@ -432,7 +537,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
exit(EXIT_FAILURE);
}
std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
std::cout << WorldRank << Mheader " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
<< "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
SharedMemoryZero(ShmCommBuf,bytes);
@ -475,7 +580,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
exit(EXIT_FAILURE);
}
if ( WorldRank == 0 ){
std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
std::cout << WorldRank << Mheader " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
<< "bytes at "<< std::hex<< ShmCommBuf << " - "<<(bytes-1+(uint64_t)ShmCommBuf) <<std::dec<<" for comms buffers " <<std::endl;
}
SharedMemoryZero(ShmCommBuf,bytes);
@ -483,8 +588,13 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Loop over ranks/gpu's on our node
///////////////////////////////////////////////////////////////////////////////////////////////////////////
#ifdef SHM_SOCKETS
UnixSockets::Open(WorldShmRank);
#endif
for(int r=0;r<WorldShmSize;r++){
MPI_Barrier(WorldShmComm);
#ifndef GRID_MPI3_SHM_NONE
//////////////////////////////////////////////////
// If it is me, pass around the IPC access key
@ -492,10 +602,10 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
void * thisBuf = ShmCommBuf;
if(!Stencil_force_mpi) {
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
typedef struct { int fd; pid_t pid ; } clone_mem_t;
typedef struct { int fd; pid_t pid ; ze_ipc_mem_handle_t ze; } clone_mem_t;
auto zeDevice = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device());
auto zeContext = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context());
auto zeDevice = cl::sycl::get_native<cl::sycl::backend::ext_oneapi_level_zero>(theGridAccelerator->get_device());
auto zeContext = cl::sycl::get_native<cl::sycl::backend::ext_oneapi_level_zero>(theGridAccelerator->get_context());
ze_ipc_mem_handle_t ihandle;
clone_mem_t handle;
@ -503,13 +613,21 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
if ( r==WorldShmRank ) {
auto err = zeMemGetIpcHandle(zeContext,ShmCommBuf,&ihandle);
if ( err != ZE_RESULT_SUCCESS ) {
std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
std::cerr << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
exit(EXIT_FAILURE);
} else {
std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
}
memcpy((void *)&handle.fd,(void *)&ihandle,sizeof(int));
handle.pid = getpid();
memcpy((void *)&handle.ze,(void *)&ihandle,sizeof(ihandle));
#ifdef SHM_SOCKETS
for(int rr=0;rr<WorldShmSize;rr++){
if(rr!=r){
UnixSockets::SendFileDescriptor(handle.fd,rr);
}
}
#endif
}
#endif
#ifdef GRID_CUDA
@ -537,6 +655,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
// Share this IPC handle across the Shm Comm
//////////////////////////////////////////////////
{
MPI_Barrier(WorldShmComm);
int ierr=MPI_Bcast(&handle,
sizeof(handle),
MPI_BYTE,
@ -552,6 +671,10 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
if ( r!=WorldShmRank ) {
thisBuf = nullptr;
int myfd;
#ifdef SHM_SOCKETS
myfd=UnixSockets::RecvFileDescriptor();
#else
std::cout<<"mapping seeking remote pid/fd "
<<handle.pid<<"/"
<<handle.fd<<std::endl;
@ -559,16 +682,22 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
int pidfd = syscall(SYS_pidfd_open,handle.pid,0);
std::cout<<"Using IpcHandle pidfd "<<pidfd<<"\n";
// int myfd = syscall(SYS_pidfd_getfd,pidfd,handle.fd,0);
int myfd = syscall(438,pidfd,handle.fd,0);
std::cout<<"Using IpcHandle myfd "<<myfd<<"\n";
myfd = syscall(438,pidfd,handle.fd,0);
int err_t = errno;
if (myfd < 0) {
fprintf(stderr,"pidfd_getfd returned %d errno was %d\n", myfd,err_t); fflush(stderr);
perror("pidfd_getfd failed ");
assert(0);
}
#endif
std::cout<<"Using IpcHandle mapped remote pid "<<handle.pid <<" FD "<<handle.fd <<" to myfd "<<myfd<<"\n";
memcpy((void *)&ihandle,(void *)&handle.ze,sizeof(ihandle));
memcpy((void *)&ihandle,(void *)&myfd,sizeof(int));
auto err = zeMemOpenIpcHandle(zeContext,zeDevice,ihandle,0,&thisBuf);
if ( err != ZE_RESULT_SUCCESS ) {
std::cout << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl;
std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
std::cerr << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl;
std::cerr << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
exit(EXIT_FAILURE);
} else {
std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle succeeded for rank "<<r<<std::endl;
@ -603,6 +732,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#else
WorldShmCommBufs[r] = ShmCommBuf;
#endif
MPI_Barrier(WorldShmComm);
}
_ShmAllocBytes=bytes;
@ -614,7 +744,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#ifdef GRID_MPI3_SHMMMAP
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << header "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -651,7 +781,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
assert(((uint64_t)ptr&0x3F)==0);
close(fd);
WorldShmCommBufs[r] =ptr;
// std::cout << header "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
// std::cout << Mheader "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
}
_ShmAlloc=1;
_ShmAllocBytes = bytes;
@ -661,7 +791,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#ifdef GRID_MPI3_SHM_NONE
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << header "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -708,7 +838,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
////////////////////////////////////////////////////////////////////////////////////////////
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << header "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
MPI_Barrier(WorldShmComm);

View File

@ -47,3 +47,4 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/lattice/Lattice_transfer.h>
#include <Grid/lattice/Lattice_basis.h>
#include <Grid/lattice/Lattice_crc.h>
#include <Grid/lattice/PaddedCell.h>

View File

@ -345,7 +345,9 @@ GridUnopClass(UnaryNot, Not(a));
GridUnopClass(UnaryTrace, trace(a));
GridUnopClass(UnaryTranspose, transpose(a));
GridUnopClass(UnaryTa, Ta(a));
GridUnopClass(UnarySpTa, SpTa(a));
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
GridUnopClass(UnaryProjectOnSpGroup, ProjectOnSpGroup(a));
GridUnopClass(UnaryTimesI, timesI(a));
GridUnopClass(UnaryTimesMinusI, timesMinusI(a));
GridUnopClass(UnaryAbs, abs(a));
@ -456,7 +458,9 @@ GRID_DEF_UNOP(operator!, UnaryNot);
GRID_DEF_UNOP(trace, UnaryTrace);
GRID_DEF_UNOP(transpose, UnaryTranspose);
GRID_DEF_UNOP(Ta, UnaryTa);
GRID_DEF_UNOP(SpTa, UnarySpTa);
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
GRID_DEF_UNOP(ProjectOnSpGroup, UnaryProjectOnSpGroup);
GRID_DEF_UNOP(timesI, UnaryTimesI);
GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI);
GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the

View File

@ -360,7 +360,7 @@ public:
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
typedef typename vobj::scalar_object sobj;
for(int g=0;g<o.Grid()->_gsites;g++){
for(int64_t g=0;g<o.Grid()->_gsites;g++){
Coordinate gcoor;
o.Grid()->GlobalIndexToGlobalCoor(g,gcoor);

View File

@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid);
template<class vobj> void DumpSliceNorm(std::string s,Lattice<vobj> &f,int mu=-1)
template<class vobj> void DumpSliceNorm(std::string s,const Lattice<vobj> &f,int mu=-1)
{
auto ff = localNorm2(f);
if ( mu==-1 ) mu = f.Grid()->Nd()-1;

View File

@ -361,9 +361,14 @@ public:
_bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1});
_uid.resize(_vol,std::uniform_int_distribution<uint32_t>() );
}
template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist){
template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist)
{
if ( l.Grid()->_isCheckerBoarded ) {
Lattice<vobj> tmp(_grid);
fill(tmp,dist);
pickCheckerboard(l.Checkerboard(),l,tmp);
return;
}
typedef typename vobj::scalar_object scalar_object;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
@ -427,7 +432,7 @@ public:
#if 1
thread_for( lidx, _grid->lSites(), {
int gidx;
int64_t gidx;
int o_idx;
int i_idx;
int rank;

View File

@ -66,6 +66,65 @@ inline auto TraceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<
return ret;
};
template<int N, class Vec>
Lattice<iScalar<iScalar<iScalar<Vec> > > > Determinant(const Lattice<iScalar<iScalar<iMatrix<Vec, N> > > > &Umu)
{
GridBase *grid=Umu.Grid();
auto lvol = grid->lSites();
Lattice<iScalar<iScalar<iScalar<Vec> > > > ret(grid);
typedef typename Vec::scalar_type scalar;
autoView(Umu_v,Umu,CpuRead);
autoView(ret_v,ret,CpuWrite);
thread_for(site,lvol,{
Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
iScalar<iScalar<iMatrix<scalar, N> > > Us;
peekLocalSite(Us, Umu_v, lcoor);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
scalar tmp= Us()()(i,j);
ComplexD ztmp(real(tmp),imag(tmp));
EigenU(i,j)=ztmp;
}}
ComplexD detD = EigenU.determinant();
typename Vec::scalar_type det(detD.real(),detD.imag());
pokeLocalSite(det,ret_v,lcoor);
});
return ret;
}
template<int N>
Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > Inverse(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
{
GridBase *grid=Umu.Grid();
auto lvol = grid->lSites();
Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > ret(grid);
autoView(Umu_v,Umu,CpuRead);
autoView(ret_v,ret,CpuWrite);
thread_for(site,lvol,{
Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
iScalar<iScalar<iMatrix<ComplexD, N> > > Us;
iScalar<iScalar<iMatrix<ComplexD, N> > > Ui;
peekLocalSite(Us, Umu_v, lcoor);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
EigenU(i,j) = Us()()(i,j);
}}
Eigen::MatrixXcd EigenUinv = EigenU.inverse();
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
Ui()()(i,j) = EigenUinv(i,j);
}}
pokeLocalSite(Ui,ret_v,lcoor);
});
return ret;
}
NAMESPACE_END(Grid);
#endif

View File

@ -471,13 +471,13 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
vobj zz = Zero();
accelerator_for(sc,coarse->oSites(),1,{
accelerator_for(sc,coarse->oSites(),vobj::Nsimd(),{
// One thread per sub block
Coordinate coor_c(_ndimension);
Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate
vobj cd = zz;
auto cd = coalescedRead(zz);
for(int sb=0;sb<blockVol;sb++){
@ -488,10 +488,10 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions);
cd=cd+fineData_p[sf];
cd=cd+coalescedRead(fineData_p[sf]);
}
coarseData_p[sc] = cd;
coalescedWrite(coarseData_p[sc],cd);
});
return;
@ -697,8 +697,68 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
for(int d=0;d<nd;d++){
assert(Fg->_processors[d] == Tg->_processors[d]);
}
// the above should guarantee that the operations are local
#if 1
size_t nsite = 1;
for(int i=0;i<nd;i++) nsite *= RegionSize[i];
size_t tbytes = 4*nsite*sizeof(int);
int *table = (int*)malloc(tbytes);
thread_for(idx, nsite, {
Coordinate from_coor, to_coor;
size_t rem = idx;
for(int i=0;i<nd;i++){
size_t base_i = rem % RegionSize[i]; rem /= RegionSize[i];
from_coor[i] = base_i + FromLowerLeft[i];
to_coor[i] = base_i + ToLowerLeft[i];
}
int foidx = Fg->oIndex(from_coor);
int fiidx = Fg->iIndex(from_coor);
int toidx = Tg->oIndex(to_coor);
int tiidx = Tg->iIndex(to_coor);
int* tt = table + 4*idx;
tt[0] = foidx;
tt[1] = fiidx;
tt[2] = toidx;
tt[3] = tiidx;
});
int* table_d = (int*)acceleratorAllocDevice(tbytes);
acceleratorCopyToDevice(table,table_d,tbytes);
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
autoView(from_v,From,AcceleratorRead);
autoView(to_v,To,AcceleratorWrite);
accelerator_for(idx,nsite,1,{
static const int words=sizeof(vobj)/sizeof(vector_type);
int* tt = table_d + 4*idx;
int from_oidx = *tt++;
int from_lane = *tt++;
int to_oidx = *tt++;
int to_lane = *tt;
const vector_type* from = (const vector_type *)&from_v[from_oidx];
vector_type* to = (vector_type *)&to_v[to_oidx];
scalar_type stmp;
for(int w=0;w<words;w++){
stmp = getlane(from[w], from_lane);
putlane(to[w], stmp, to_lane);
}
});
acceleratorFreeDevice(table_d);
free(table);
#else
Coordinate ldf = Fg->_ldimensions;
Coordinate rdf = Fg->_rdimensions;
Coordinate isf = Fg->_istride;
@ -707,9 +767,9 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
Coordinate ist = Tg->_istride;
Coordinate ost = Tg->_ostride;
autoView( t_v , To, AcceleratorWrite);
autoView( f_v , From, AcceleratorRead);
accelerator_for(idx,Fg->lSites(),1,{
autoView( t_v , To, CpuWrite);
autoView( f_v , From, CpuRead);
thread_for(idx,Fg->lSites(),{
sobj s;
Coordinate Fcoor(nd);
Coordinate Tcoor(nd);
@ -722,17 +782,24 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
Tcoor[d] = ToLowerLeft[d]+ Fcoor[d]-FromLowerLeft[d];
}
if (in_region) {
Integer idx_f = 0; for(int d=0;d<nd;d++) idx_f+=isf[d]*(Fcoor[d]/rdf[d]);
Integer idx_t = 0; for(int d=0;d<nd;d++) idx_t+=ist[d]*(Tcoor[d]/rdt[d]);
Integer odx_f = 0; for(int d=0;d<nd;d++) odx_f+=osf[d]*(Fcoor[d]%rdf[d]);
Integer odx_t = 0; for(int d=0;d<nd;d++) odx_t+=ost[d]*(Tcoor[d]%rdt[d]);
vector_type * fp = (vector_type *)&f_v[odx_f];
vector_type * tp = (vector_type *)&t_v[odx_t];
#if 0
Integer idx_f = 0; for(int d=0;d<nd;d++) idx_f+=isf[d]*(Fcoor[d]/rdf[d]); // inner index from
Integer idx_t = 0; for(int d=0;d<nd;d++) idx_t+=ist[d]*(Tcoor[d]/rdt[d]); // inner index to
Integer odx_f = 0; for(int d=0;d<nd;d++) odx_f+=osf[d]*(Fcoor[d]%rdf[d]); // outer index from
Integer odx_t = 0; for(int d=0;d<nd;d++) odx_t+=ost[d]*(Tcoor[d]%rdt[d]); // outer index to
scalar_type * fp = (scalar_type *)&f_v[odx_f];
scalar_type * tp = (scalar_type *)&t_v[odx_t];
for(int w=0;w<words;w++){
tp[w].putlane(fp[w].getlane(idx_f),idx_t);
}
#else
peekLocalSite(s,f_v,Fcoor);
pokeLocalSite(s,t_v,Tcoor);
#endif
}
});
#endif
}
@ -825,6 +892,8 @@ void ExtractSlice(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slic
}
//Insert subvolume orthogonal to direction 'orthog' with slice index 'slice_lo' from 'lowDim' onto slice index 'slice_hi' of higherDim
//The local dimensions of both 'lowDim' and 'higherDim' orthogonal to 'orthog' should be the same
template<class vobj>
void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{
@ -841,11 +910,70 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
for(int d=0;d<nh;d++){
if ( d!=orthog ) {
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
}
#if 1
size_t nsite = lg->lSites()/lg->LocalDimensions()[orthog];
size_t tbytes = 4*nsite*sizeof(int);
int *table = (int*)malloc(tbytes);
thread_for(idx,nsite,{
Coordinate lcoor(nl);
Coordinate hcoor(nh);
lcoor[orthog] = slice_lo;
hcoor[orthog] = slice_hi;
size_t rem = idx;
for(int mu=0;mu<nl;mu++){
if(mu != orthog){
int xmu = rem % lg->LocalDimensions()[mu]; rem /= lg->LocalDimensions()[mu];
lcoor[mu] = hcoor[mu] = xmu;
}
}
int loidx = lg->oIndex(lcoor);
int liidx = lg->iIndex(lcoor);
int hoidx = hg->oIndex(hcoor);
int hiidx = hg->iIndex(hcoor);
int* tt = table + 4*idx;
tt[0] = loidx;
tt[1] = liidx;
tt[2] = hoidx;
tt[3] = hiidx;
});
int* table_d = (int*)acceleratorAllocDevice(tbytes);
acceleratorCopyToDevice(table,table_d,tbytes);
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
autoView(lowDim_v,lowDim,AcceleratorRead);
autoView(higherDim_v,higherDim,AcceleratorWrite);
accelerator_for(idx,nsite,1,{
static const int words=sizeof(vobj)/sizeof(vector_type);
int* tt = table_d + 4*idx;
int from_oidx = *tt++;
int from_lane = *tt++;
int to_oidx = *tt++;
int to_lane = *tt;
const vector_type* from = (const vector_type *)&lowDim_v[from_oidx];
vector_type* to = (vector_type *)&higherDim_v[to_oidx];
scalar_type stmp;
for(int w=0;w<words;w++){
stmp = getlane(from[w], from_lane);
putlane(to[w], stmp, to_lane);
}
});
acceleratorFreeDevice(table_d);
free(table);
#else
// the above should guarantee that the operations are local
autoView(lowDimv,lowDim,CpuRead);
autoView(higherDimv,higherDim,CpuWrite);
@ -861,6 +989,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
pokeLocalSite(s,higherDimv,hcoor);
}
});
#endif
}
@ -925,7 +1054,7 @@ void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine)
Coordinate fcoor(nd);
Coordinate ccoor(nd);
for(int g=0;g<fg->gSites();g++){
for(int64_t g=0;g<fg->gSites();g++){
fg->GlobalIndexToGlobalCoor(g,fcoor);
for(int d=0;d<nd;d++){

565
Grid/lattice/PaddedCell.h Normal file
View File

@ -0,0 +1,565 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/PaddedCell.h
Copyright (C) 2019
Author: Peter Boyle pboyle@bnl.gov
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include<Grid/cshift/Cshift.h>
NAMESPACE_BEGIN(Grid);
//Allow the user to specify how the C-shift is performed, e.g. to respect the appropriate boundary conditions
template<typename vobj>
struct CshiftImplBase{
virtual Lattice<vobj> Cshift(const Lattice<vobj> &in, int dir, int shift) const = 0;
virtual ~CshiftImplBase(){}
};
template<typename vobj>
struct CshiftImplDefault: public CshiftImplBase<vobj>{
Lattice<vobj> Cshift(const Lattice<vobj> &in, int dir, int shift) const override{ return Grid::Cshift(in,dir,shift); }
};
template<typename Gimpl>
struct CshiftImplGauge: public CshiftImplBase<typename Gimpl::GaugeLinkField::vector_object>{
typename Gimpl::GaugeLinkField Cshift(const typename Gimpl::GaugeLinkField &in, int dir, int shift) const override{ return Gimpl::CshiftLink(in,dir,shift); }
};
/*
*
* TODO:
* -- address elementsof vobj via thread block in Scatter/Gather
* -- overlap comms with motion in Face_exchange
*
*/
template<class vobj> inline void ScatterSlice(const cshiftVector<vobj> &buf,
Lattice<vobj> &lat,
int x,
int dim,
int offset=0)
{
const int Nsimd=vobj::Nsimd();
typedef typename vobj::scalar_object sobj;
GridBase *grid = lat.Grid();
Coordinate simd = grid->_simd_layout;
int Nd = grid->Nd();
int block = grid->_slice_block[dim];
int stride = grid->_slice_stride[dim];
int nblock = grid->_slice_nblock[dim];
int rd = grid->_rdimensions[dim];
int ox = x%rd;
int ix = x/rd;
int isites = 1; for(int d=0;d<Nd;d++) if( d!=dim) isites*=simd[d];
Coordinate rsimd= simd; rsimd[dim]=1; // maybe reduce Nsimd
int rNsimd = 1; for(int d=0;d<Nd;d++) rNsimd*=rsimd[d];
int rNsimda= Nsimd/simd[dim]; // should be equal
assert(rNsimda==rNsimd);
int face_ovol=block*nblock;
// assert(buf.size()==face_ovol*rNsimd);
/*This will work GPU ONLY unless rNsimd is put in the lexico index*/
//Let's make it work on GPU and then make a special accelerator_for that
//doesn't hide the SIMD direction and keeps explicit in the threadIdx
//for cross platform
// FIXME -- can put internal indices into thread loop
auto buf_p = & buf[0];
autoView(lat_v, lat, AcceleratorRead);
accelerator_for(ss, face_ovol/simd[dim],Nsimd,{
// scalar layout won't coalesce
int blane=acceleratorSIMTlane(Nsimd); // buffer lane
int olane=blane%rNsimd; // reduced lattice lane
int obit =blane/rNsimd;
///////////////////////////////////////////////////////////////
// osite -- potentially one bit from simd in the buffer: (ss<<1)|obit
///////////////////////////////////////////////////////////////
int ssp = ss*simd[dim]+obit;
int b = ssp%block;
int n = ssp/block;
int osite= b+n*stride + ox*block;
////////////////////////////////////////////
// isite -- map lane within buffer to lane within lattice
////////////////////////////////////////////
Coordinate icoor;
int lane;
Lexicographic::CoorFromIndex(icoor,olane,rsimd);
icoor[dim]=ix;
Lexicographic::IndexFromCoor(icoor,lane,simd);
///////////////////////////////////////////
// Transfer into lattice - will coalesce
///////////////////////////////////////////
sobj obj = extractLane(blane,buf_p[ss+offset]);
insertLane(lane,lat_v[osite],obj);
});
}
template<class vobj> inline void GatherSlice(cshiftVector<vobj> &buf,
const Lattice<vobj> &lat,
int x,
int dim,
int offset=0)
{
const int Nsimd=vobj::Nsimd();
typedef typename vobj::scalar_object sobj;
autoView(lat_v, lat, AcceleratorRead);
GridBase *grid = lat.Grid();
Coordinate simd = grid->_simd_layout;
int Nd = grid->Nd();
int block = grid->_slice_block[dim];
int stride = grid->_slice_stride[dim];
int nblock = grid->_slice_nblock[dim];
int rd = grid->_rdimensions[dim];
int ox = x%rd;
int ix = x/rd;
int isites = 1; for(int d=0;d<Nd;d++) if( d!=dim) isites*=simd[d];
Coordinate rsimd= simd; rsimd[dim]=1; // maybe reduce Nsimd
int rNsimd = 1; for(int d=0;d<Nd;d++) rNsimd*=rsimd[d];
int face_ovol=block*nblock;
// assert(buf.size()==face_ovol*rNsimd);
/*This will work GPU ONLY unless rNsimd is put in the lexico index*/
//Let's make it work on GPU and then make a special accelerator_for that
//doesn't hide the SIMD direction and keeps explicit in the threadIdx
//for cross platform
//For CPU perhaps just run a loop over Nsimd
auto buf_p = & buf[0];
accelerator_for(ss, face_ovol/simd[dim],Nsimd,{
// scalar layout won't coalesce
int blane=acceleratorSIMTlane(Nsimd); // buffer lane
int olane=blane%rNsimd; // reduced lattice lane
int obit =blane/rNsimd;
////////////////////////////////////////////
// osite
////////////////////////////////////////////
int ssp = ss*simd[dim]+obit;
int b = ssp%block;
int n = ssp/block;
int osite= b+n*stride + ox*block;
////////////////////////////////////////////
// isite -- map lane within buffer to lane within lattice
////////////////////////////////////////////
Coordinate icoor;
int lane;
Lexicographic::CoorFromIndex(icoor,olane,rsimd);
icoor[dim]=ix;
Lexicographic::IndexFromCoor(icoor,lane,simd);
///////////////////////////////////////////
// Take out of lattice
///////////////////////////////////////////
sobj obj = extractLane(lane,lat_v[osite]);
insertLane(blane,buf_p[ss+offset],obj);
});
/*
int words =block*nblock/simd[dim];
std::vector<vobj> tbuf(words);
acceleratorCopyFromDevice((void *)&buf[offset],(void *)&tbuf[0],words*sizeof(vobj));
typedef typename vobj::scalar_type scalar;
scalar *sbuf = (scalar *)&tbuf[0];
scalar tmp=0.0;
for(int w=0;w<words*sizeof(vobj)/sizeof(scalar);w++){
tmp=tmp+conjugate(sbuf[w])*sbuf[w];
}
std::cout << " Gathered buffer norm "<<tmp<<std::endl;
*/
}
class PaddedCell {
public:
GridCartesian * unpadded_grid;
int dims;
int depth;
std::vector<GridCartesian *> grids;
~PaddedCell()
{
DeleteGrids();
}
PaddedCell(int _depth,GridCartesian *_grid)
{
unpadded_grid = _grid;
depth=_depth;
dims=_grid->Nd();
AllocateGrids();
Coordinate local =unpadded_grid->LocalDimensions();
Coordinate procs =unpadded_grid->ProcessorGrid();
for(int d=0;d<dims;d++){
if ( procs[d] > 1 ) assert(local[d]>=depth);
}
}
void DeleteGrids(void)
{
for(int d=0;d<grids.size();d++){
delete grids[d];
}
grids.resize(0);
};
void AllocateGrids(void)
{
Coordinate local =unpadded_grid->LocalDimensions();
Coordinate simd =unpadded_grid->_simd_layout;
Coordinate processors=unpadded_grid->_processors;
Coordinate plocal =unpadded_grid->LocalDimensions();
Coordinate global(dims);
GridCartesian *old_grid = unpadded_grid;
// expand up one dim at a time
for(int d=0;d<dims;d++){
if ( processors[d] > 1 ) {
plocal[d] += 2*depth;
for(int d=0;d<dims;d++){
global[d] = plocal[d]*processors[d];
}
old_grid = new GridCartesian(global,simd,processors);
}
grids.push_back(old_grid);
}
};
template<class vobj>
inline Lattice<vobj> Extract(const Lattice<vobj> &in) const
{
Coordinate processors=unpadded_grid->_processors;
Lattice<vobj> out(unpadded_grid);
Coordinate local =unpadded_grid->LocalDimensions();
// depends on the MPI spread
Coordinate fll(dims,depth);
Coordinate tll(dims,0); // depends on the MPI spread
for(int d=0;d<dims;d++){
if( processors[d]==1 ) fll[d]=0;
}
localCopyRegion(in,out,fll,tll,local);
return out;
}
template<class vobj>
inline Lattice<vobj> Exchange(const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
{
GridBase *old_grid = in.Grid();
int dims = old_grid->Nd();
Lattice<vobj> tmp = in;
for(int d=0;d<dims;d++){
tmp = Expand(d,tmp,cshift); // rvalue && assignment
}
return tmp;
}
template<class vobj>
inline Lattice<vobj> ExchangePeriodic(const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
{
GridBase *old_grid = in.Grid();
int dims = old_grid->Nd();
Lattice<vobj> tmp = in;
for(int d=0;d<dims;d++){
tmp = ExpandPeriodic(d,tmp,cshift); // rvalue && assignment
}
return tmp;
}
// expand up one dim at a time
template<class vobj>
inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
{
Coordinate processors=unpadded_grid->_processors;
GridBase *old_grid = in.Grid();
GridCartesian *new_grid = grids[dim];//These are new grids
Lattice<vobj> padded(new_grid);
Lattice<vobj> shifted(old_grid);
Coordinate local =old_grid->LocalDimensions();
Coordinate plocal =new_grid->LocalDimensions();
if(dim==0) conformable(old_grid,unpadded_grid);
else conformable(old_grid,grids[dim-1]);
// std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl;
double tins=0, tshift=0;
int islocal = 0 ;
if ( processors[dim] == 1 ) islocal = 1;
if ( islocal ) {
// replace with a copy and maybe grid swizzle
double t = usecond();
padded = in;
tins += usecond() - t;
} else {
//////////////////////////////////////////////
// Replace sequence with
// ---------------------
// (i) Gather high face(s); start comms
// (ii) Gather low face(s); start comms
// (iii) Copy middle bit with localCopyRegion
// (iv) Complete high face(s), insert slice(s)
// (iv) Complete low face(s), insert slice(s)
//////////////////////////////////////////////
// Middle bit
double t = usecond();
for(int x=0;x<local[dim];x++){
InsertSliceLocal(in,padded,x,depth+x,dim);
}
tins += usecond() - t;
// High bit
t = usecond();
shifted = cshift.Cshift(in,dim,depth);
tshift += usecond() - t;
t=usecond();
for(int x=0;x<depth;x++){
InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim);
}
tins += usecond() - t;
// Low bit
t = usecond();
shifted = cshift.Cshift(in,dim,-depth);
tshift += usecond() - t;
t = usecond();
for(int x=0;x<depth;x++){
InsertSliceLocal(shifted,padded,x,x,dim);
}
tins += usecond() - t;
}
std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl;
return padded;
}
template<class vobj>
inline Lattice<vobj> ExpandPeriodic(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
{
Coordinate processors=unpadded_grid->_processors;
GridBase *old_grid = in.Grid();
GridCartesian *new_grid = grids[dim];//These are new grids
Lattice<vobj> padded(new_grid);
Lattice<vobj> shifted(old_grid);
Coordinate local =old_grid->LocalDimensions();
Coordinate plocal =new_grid->LocalDimensions();
if(dim==0) conformable(old_grid,unpadded_grid);
else conformable(old_grid,grids[dim-1]);
// std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl;
double tins=0, tshift=0;
int islocal = 0 ;
if ( processors[dim] == 1 ) islocal = 1;
if ( islocal ) {
// replace with a copy and maybe grid swizzle
double t = usecond();
padded = in;
tins += usecond() - t;
} else {
//////////////////////////////////////////////
// Replace sequence with
// ---------------------
// (i) Gather high face(s); start comms
// (ii) Gather low face(s); start comms
// (iii) Copy middle bit with localCopyRegion
// (iv) Complete high face(s), insert slice(s)
// (iv) Complete low face(s), insert slice(s)
//////////////////////////////////////////////
Face_exchange(in,padded,dim,depth);
}
return padded;
}
template<class vobj>
void Face_exchange(const Lattice<vobj> &from,
Lattice<vobj> &to,
int dimension,int depth) const
{
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::scalar_object sobj;
RealD t_gather=0.0;
RealD t_scatter=0.0;
RealD t_comms=0.0;
RealD t_copy=0.0;
// std::cout << GridLogMessage << "dimension " <<dimension<<std::endl;
// DumpSliceNorm(std::string("Face_exchange from"),from,dimension);
GridBase *grid=from.Grid();
GridBase *new_grid=to.Grid();
Coordinate lds = from.Grid()->_ldimensions;
Coordinate nlds= to.Grid()->_ldimensions;
Coordinate simd= from.Grid()->_simd_layout;
int ld = lds[dimension];
int nld = to.Grid()->_ldimensions[dimension];
const int Nsimd = vobj::Nsimd();
assert(depth<=lds[dimension]); // A must be on neighbouring node
assert(depth>0); // A caller bug if zero
assert(ld+2*depth==nld);
////////////////////////////////////////////////////////////////////////////
// Face size and byte calculations
////////////////////////////////////////////////////////////////////////////
int buffer_size = 1;
for(int d=0;d<lds.size();d++){
if ( d!= dimension) buffer_size=buffer_size*lds[d];
}
buffer_size = buffer_size / Nsimd;
int rNsimd = Nsimd / simd[dimension];
assert( buffer_size == from.Grid()->_slice_nblock[dimension]*from.Grid()->_slice_block[dimension] / simd[dimension]);
static cshiftVector<vobj> send_buf;
static cshiftVector<vobj> recv_buf;
send_buf.resize(buffer_size*2*depth);
recv_buf.resize(buffer_size*2*depth);
std::vector<CommsRequest_t> fwd_req;
std::vector<CommsRequest_t> bwd_req;
int words = buffer_size;
int bytes = words * sizeof(vobj);
////////////////////////////////////////////////////////////////////////////
// Communication coords
////////////////////////////////////////////////////////////////////////////
int comm_proc = 1;
int xmit_to_rank;
int recv_from_rank;
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
////////////////////////////////////////////////////////////////////////////
// Gather all surface terms up to depth "d"
////////////////////////////////////////////////////////////////////////////
RealD t;
int plane=0;
for ( int d=0;d < depth ; d ++ ) {
int tag = d*1024 + dimension*2+0;
t=usecond();
GatherSlice(send_buf,from,d,dimension,plane*buffer_size); plane++;
t_gather+=usecond()-t;
t=usecond();
grid->SendToRecvFromBegin(fwd_req,
(void *)&send_buf[d*buffer_size], xmit_to_rank,
(void *)&recv_buf[d*buffer_size], recv_from_rank, bytes, tag);
t_comms+=usecond()-t;
}
for ( int d=0;d < depth ; d ++ ) {
int tag = d*1024 + dimension*2+1;
t=usecond();
GatherSlice(send_buf,from,ld-depth+d,dimension,plane*buffer_size); plane++;
t_gather+= usecond() - t;
t=usecond();
grid->SendToRecvFromBegin(bwd_req,
(void *)&send_buf[(d+depth)*buffer_size], recv_from_rank,
(void *)&recv_buf[(d+depth)*buffer_size], xmit_to_rank, bytes,tag);
t_comms+=usecond()-t;
}
////////////////////////////////////////////////////////////////////////////
// Copy interior -- overlap this with comms
////////////////////////////////////////////////////////////////////////////
int Nd = new_grid->Nd();
Coordinate LL(Nd,0);
Coordinate sz = grid->_ldimensions;
Coordinate toLL(Nd,0);
toLL[dimension]=depth;
t=usecond();
localCopyRegion(from,to,LL,toLL,sz);
t_copy= usecond() - t;
////////////////////////////////////////////////////////////////////////////
// Scatter all faces
////////////////////////////////////////////////////////////////////////////
// DumpSliceNorm(std::string("Face_exchange to before scatter"),to,dimension);
plane=0;
t=usecond();
grid->CommsComplete(fwd_req);
t_comms+= usecond() - t;
t=usecond();
for ( int d=0;d < depth ; d ++ ) {
ScatterSlice(recv_buf,to,nld-depth+d,dimension,plane*buffer_size); plane++;
}
t_scatter= usecond() - t;
t=usecond();
grid->CommsComplete(bwd_req);
t_comms+= usecond() - t;
t=usecond();
for ( int d=0;d < depth ; d ++ ) {
ScatterSlice(recv_buf,to,d,dimension,plane*buffer_size); plane++;
}
t_scatter+= usecond() - t;
// DumpSliceNorm(std::string("Face_exchange to scatter 1st "),to,dimension);
//DumpSliceNorm(std::string("Face_exchange to done"),to,dimension);
std::cout << GridLogPerformance << "PaddedCell::Expand new timings: gather :" << t_gather/1000 << "ms"<<std::endl;
// std::cout << GridLogPerformance << "PaddedCell::Expand new timings: gather :" << 2.0*bytes/t_gather << "MB/s"<<std::endl;
std::cout << GridLogPerformance << "PaddedCell::Expand new timings: scatter:" << t_scatter/1000 << "ms"<<std::endl;
// std::cout << GridLogPerformance << "PaddedCell::Expand new timings: scatter:" << 2.0*bytes/t_scatter<< "MB/s"<<std::endl;
std::cout << GridLogPerformance << "PaddedCell::Expand new timings: copy :" << t_copy/1000 << "ms"<<std::endl;
std::cout << GridLogPerformance << "PaddedCell::Expand new timings: comms :" << t_comms/1000 << "ms"<<std::endl;
// std::cout << GridLogPerformance << "PaddedCell::Expand new timings: comms :" << (RealD)4.0*bytes/t_comms << "MB/s"<<std::endl;
}
};
NAMESPACE_END(Grid);

View File

@ -165,7 +165,7 @@ class BinaryIO {
* FIXME -- 128^3 x 256 x 16 will overflow.
*/
int global_site;
int64_t global_site;
Lexicographic::CoorFromIndex(coor,local_site,local_vol);
@ -175,8 +175,8 @@ class BinaryIO {
Lexicographic::IndexFromCoor(coor,global_site,global_vol);
uint32_t gsite29 = global_site%29;
uint32_t gsite31 = global_site%31;
uint64_t gsite29 = global_site%29;
uint64_t gsite31 = global_site%31;
site_crc = crc32(0,(unsigned char *)site_buf,sizeof(fobj));
// std::cout << "Site "<<local_site << " crc "<<std::hex<<site_crc<<std::dec<<std::endl;
@ -545,7 +545,9 @@ class BinaryIO {
const std::string &format,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
uint32_t &scidac_csumb)
uint32_t &scidac_csumb,
int control=BINARYIO_LEXICOGRAPHIC
)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::Realified::scalar_type word; word w=0;
@ -556,7 +558,7 @@ class BinaryIO {
std::vector<sobj> scalardata(lsites);
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|control,
nersc_csum,scidac_csuma,scidac_csumb);
GridStopWatch timer;
@ -582,7 +584,8 @@ class BinaryIO {
const std::string &format,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
uint32_t &scidac_csumb)
uint32_t &scidac_csumb,
int control=BINARYIO_LEXICOGRAPHIC)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::Realified::scalar_type word; word w=0;
@ -607,7 +610,7 @@ class BinaryIO {
while (attemptsLeft >= 0)
{
grid->Barrier();
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|control,
nersc_csum,scidac_csuma,scidac_csumb);
if (checkWrite)
{
@ -617,7 +620,7 @@ class BinaryIO {
std::cout << GridLogMessage << "writeLatticeObject: read back object" << std::endl;
grid->Barrier();
IOobject(w,grid,ckiodata,file,ckoffset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
IOobject(w,grid,ckiodata,file,ckoffset,format,BINARYIO_READ|control,
cknersc_csum,ckscidac_csuma,ckscidac_csumb);
if ((cknersc_csum != nersc_csum) or (ckscidac_csuma != scidac_csuma) or (ckscidac_csumb != scidac_csumb))
{

View File

@ -206,7 +206,7 @@ class GridLimeReader : public BinaryIO {
// Read a generic lattice field and verify checksum
////////////////////////////////////////////
template<class vobj>
void readLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
void readLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name,int control=BINARYIO_LEXICOGRAPHIC)
{
typedef typename vobj::scalar_object sobj;
scidacChecksum scidacChecksum_;
@ -238,7 +238,7 @@ class GridLimeReader : public BinaryIO {
uint64_t offset= ftello(File);
// std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb,control);
std::cout << GridLogMessage << "SciDAC checksum A " << std::hex << scidac_csuma << std::dec << std::endl;
std::cout << GridLogMessage << "SciDAC checksum B " << std::hex << scidac_csumb << std::dec << std::endl;
/////////////////////////////////////////////
@ -408,7 +408,7 @@ class GridLimeWriter : public BinaryIO
// in communicator used by the field.Grid()
////////////////////////////////////////////////////
template<class vobj>
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name,int control=BINARYIO_LEXICOGRAPHIC)
{
////////////////////////////////////////////////////////////////////
// NB: FILE and iostream are jointly writing disjoint sequences in the
@ -459,7 +459,7 @@ class GridLimeWriter : public BinaryIO
///////////////////////////////////////////
std::string format = getFormatString<vobj>();
BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb);
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb,control);
///////////////////////////////////////////
// Wind forward and close the record
@ -512,7 +512,8 @@ class ScidacWriter : public GridLimeWriter {
////////////////////////////////////////////////
template <class vobj, class userRecord>
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord,
const unsigned int recordScientificPrec = 0)
const unsigned int recordScientificPrec = 0,
int control=BINARYIO_LEXICOGRAPHIC)
{
GridBase * grid = field.Grid();
@ -534,7 +535,7 @@ class ScidacWriter : public GridLimeWriter {
writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
}
// Collective call
writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA),control); // Closes message with checksum
}
};
@ -553,7 +554,8 @@ class ScidacReader : public GridLimeReader {
// Write generic lattice field in scidac format
////////////////////////////////////////////////
template <class vobj, class userRecord>
void readScidacFieldRecord(Lattice<vobj> &field,userRecord &_userRecord)
void readScidacFieldRecord(Lattice<vobj> &field,userRecord &_userRecord,
int control=BINARYIO_LEXICOGRAPHIC)
{
typedef typename vobj::scalar_object sobj;
GridBase * grid = field.Grid();
@ -571,7 +573,7 @@ class ScidacReader : public GridLimeReader {
readLimeObject(header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
readLimeObject(_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
readLimeObject(_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
readLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA));
readLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA),control);
}
void skipPastBinaryRecord(void) {
std::string rec_name(ILDG_BINARY_DATA);

View File

@ -104,6 +104,7 @@ template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iSca
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
template<typename vtype> using iLorentzComplex = iVector<iScalar<iScalar<vtype> >, Nd > ;
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
@ -178,6 +179,15 @@ typedef iLorentzColourMatrix<vComplexF> vLorentzColourMatrixF;
typedef iLorentzColourMatrix<vComplexD> vLorentzColourMatrixD;
typedef iLorentzColourMatrix<vComplexD2> vLorentzColourMatrixD2;
// LorentzComplex
typedef iLorentzComplex<Complex > LorentzComplex;
typedef iLorentzComplex<ComplexF > LorentzComplexF;
typedef iLorentzComplex<ComplexD > LorentzComplexD;
typedef iLorentzComplex<vComplex > vLorentzComplex;
typedef iLorentzComplex<vComplexF> vLorentzComplexF;
typedef iLorentzComplex<vComplexD> vLorentzComplexD;
// DoubleStored gauge field
typedef iDoubleStoredColourMatrix<Complex > DoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<ComplexF > DoubleStoredColourMatrixF;
@ -307,6 +317,10 @@ typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD;
typedef Lattice<vLorentzColourMatrixD2> LatticeLorentzColourMatrixD2;
typedef Lattice<vLorentzComplex> LatticeLorentzComplex;
typedef Lattice<vLorentzComplexF> LatticeLorentzComplexF;
typedef Lattice<vLorentzComplexD> LatticeLorentzComplexD;
// DoubleStored gauge field
typedef Lattice<vDoubleStoredColourMatrix> LatticeDoubleStoredColourMatrix;
typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF;

View File

@ -34,10 +34,24 @@ directory
NAMESPACE_BEGIN(Grid);
///////////////////////////////////
// Smart configuration base class
///////////////////////////////////
template< class Field >
class ConfigurationBase
{
public:
ConfigurationBase() {}
virtual ~ConfigurationBase() {}
virtual void set_Field(Field& U) =0;
virtual void smeared_force(Field&) = 0;
virtual Field& get_SmearedU() =0;
virtual Field &get_U(bool smeared = false) = 0;
};
template <class GaugeField >
class Action
{
public:
bool is_smeared = false;
RealD deriv_norm_sum;
@ -77,11 +91,39 @@ public:
void refresh_timer_stop(void) { refresh_us+=usecond(); }
void S_timer_start(void) { S_us-=usecond(); }
void S_timer_stop(void) { S_us+=usecond(); }
/////////////////////////////
// Heatbath?
/////////////////////////////
virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
virtual RealD S(const GaugeField& U) = 0; // evaluate the action
virtual RealD Sinitial(const GaugeField& U) { return this->S(U); } ; // if the refresh computes the action, can cache it. Alternately refreshAndAction() ?
virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0; // evaluate the action derivative
/////////////////////////////////////////////////////////////
// virtual smeared interface through configuration container
/////////////////////////////////////////////////////////////
virtual void refresh(ConfigurationBase<GaugeField> & U, GridSerialRNG &sRNG, GridParallelRNG& pRNG)
{
refresh(U.get_U(is_smeared),sRNG,pRNG);
}
virtual RealD S(ConfigurationBase<GaugeField>& U)
{
return S(U.get_U(is_smeared));
}
virtual RealD Sinitial(ConfigurationBase<GaugeField>& U)
{
return Sinitial(U.get_U(is_smeared));
}
virtual void deriv(ConfigurationBase<GaugeField>& U, GaugeField& dSdU)
{
deriv(U.get_U(is_smeared),dSdU);
if ( is_smeared ) {
U.smeared_force(dSdU);
}
}
///////////////////////////////
// Logging
///////////////////////////////
virtual std::string action_name() = 0; // return the action name
virtual std::string LogParameters() = 0; // prints action parameters
virtual ~Action(){}

View File

@ -30,6 +30,8 @@ directory
#ifndef QCD_ACTION_CORE
#define QCD_ACTION_CORE
#include <Grid/qcd/action/gauge/GaugeImplementations.h>
#include <Grid/qcd/action/ActionBase.h>
NAMESPACE_CHECK(ActionBase);
#include <Grid/qcd/action/ActionSet.h>

View File

@ -126,6 +126,16 @@ typedef WilsonFermion<WilsonTwoIndexSymmetricImplD> WilsonTwoIndexSymmetricFermi
typedef WilsonFermion<WilsonTwoIndexAntiSymmetricImplF> WilsonTwoIndexAntiSymmetricFermionF;
typedef WilsonFermion<WilsonTwoIndexAntiSymmetricImplD> WilsonTwoIndexAntiSymmetricFermionD;
// Sp(2n)
typedef WilsonFermion<SpWilsonImplF> SpWilsonFermionF;
typedef WilsonFermion<SpWilsonImplD> SpWilsonFermionD;
typedef WilsonFermion<SpWilsonTwoIndexAntiSymmetricImplF> SpWilsonTwoIndexAntiSymmetricFermionF;
typedef WilsonFermion<SpWilsonTwoIndexAntiSymmetricImplD> SpWilsonTwoIndexAntiSymmetricFermionD;
typedef WilsonFermion<SpWilsonTwoIndexSymmetricImplF> SpWilsonTwoIndexSymmetricFermionF;
typedef WilsonFermion<SpWilsonTwoIndexSymmetricImplD> SpWilsonTwoIndexSymmetricFermionD;
// Twisted mass fermion
typedef WilsonTMFermion<WilsonImplD2> WilsonTMFermionD2;
typedef WilsonTMFermion<WilsonImplF> WilsonTMFermionF;

View File

@ -507,6 +507,7 @@ public:
}
this->face_table_computed=1;
assert(this->u_comm_offset==this->_unified_buffer_size);
accelerator_barrier();
}
};

View File

@ -261,6 +261,22 @@ typedef WilsonImpl<vComplex, TwoIndexAntiSymmetricRepresentation, CoeffReal > W
typedef WilsonImpl<vComplexF, TwoIndexAntiSymmetricRepresentation, CoeffReal > WilsonTwoIndexAntiSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, TwoIndexAntiSymmetricRepresentation, CoeffReal > WilsonTwoIndexAntiSymmetricImplD; // Double
//sp 2n
typedef WilsonImpl<vComplex, SpFundamentalRepresentation, CoeffReal > SpWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, SpFundamentalRepresentation, CoeffReal > SpWilsonImplF; // Float
typedef WilsonImpl<vComplexD, SpFundamentalRepresentation, CoeffReal > SpWilsonImplD; // Double
typedef WilsonImpl<vComplex, SpTwoIndexAntiSymmetricRepresentation, CoeffReal > SpWilsonTwoIndexAntiSymmetricImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, SpTwoIndexAntiSymmetricRepresentation, CoeffReal > SpWilsonTwoIndexAntiSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, SpTwoIndexAntiSymmetricRepresentation, CoeffReal > SpWilsonTwoIndexAntiSymmetricImplD; // Double
typedef WilsonImpl<vComplex, SpTwoIndexSymmetricRepresentation, CoeffReal > SpWilsonTwoIndexSymmetricImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, SpTwoIndexSymmetricRepresentation, CoeffReal > SpWilsonTwoIndexSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, SpTwoIndexSymmetricRepresentation, CoeffReal > SpWilsonTwoIndexSymmetricImplD; // Double
typedef WilsonImpl<vComplex, SpTwoIndexSymmetricRepresentation, CoeffReal > SpWilsonAdjImplR; // Real.. whichever prec // adj = 2indx symmetric for Sp(2N)
typedef WilsonImpl<vComplexF, SpTwoIndexSymmetricRepresentation, CoeffReal > SpWilsonAdjImplF; // Float // adj = 2indx symmetric for Sp(2N)
typedef WilsonImpl<vComplexD, SpTwoIndexSymmetricRepresentation, CoeffReal > SpWilsonAdjImplD; // Double // adj = 2indx symmetric for Sp(2N)
NAMESPACE_END(Grid);

View File

@ -332,8 +332,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
/////////////////////////////
{
GRID_TRACE("Gather");
st.HaloExchangeOptGather(in,compressor);
accelerator_barrier();
st.HaloExchangeOptGather(in,compressor); // Put the barrier in the routine
}
std::vector<std::vector<CommsRequest_t> > requests;

View File

@ -423,14 +423,14 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
#define KERNEL_CALL_EXT(A) \
const uint64_t NN = Nsite*Ls; \
const uint64_t sz = st.surface_list.size(); \
auto ptr = &st.surface_list[0]; \
accelerator_forNB( ss, sz, Simd::Nsimd(), { \
int sF = ptr[ss]; \
int sU = ss/Ls; \
int sU = sF/Ls; \
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
});
}); \
accelerator_barrier();
#define ASM_CALL(A) \
thread_for( sss, Nsite, { \
@ -474,9 +474,10 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;}
#endif
} else if( exterior ) {
// dependent on result of merge
acceleratorFenceComputeStream();
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL_EXT(GenericDhopSiteExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteExt); return;}
#ifndef GRID_CUDA
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); return;}
#endif
@ -506,9 +507,10 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;}
#endif
} else if( exterior ) {
// Dependent on result of merge
acceleratorFenceComputeStream();
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;}
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL_EXT(GenericDhopSiteDagExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteDagExt); return;}
#ifndef GRID_CUDA
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;}
#endif

View File

@ -0,0 +1 @@
../WilsonCloverFermionInstantiation.cc.master

View File

@ -0,0 +1 @@
../WilsonFermionInstantiation.cc.master

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -0,0 +1 @@
../WilsonTMFermionInstantiation.cc.master

View File

@ -0,0 +1 @@
#define IMPLEMENTATION SpWilsonImplD

View File

@ -0,0 +1 @@
../WilsonCloverFermionInstantiation.cc.master

View File

@ -0,0 +1 @@
../WilsonFermionInstantiation.cc.master

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -0,0 +1 @@
../WilsonTMFermionInstantiation.cc.master

View File

@ -0,0 +1 @@
#define IMPLEMENTATION SpWilsonImplF

View File

@ -0,0 +1 @@
#define IMPLEMENTATION SpWilsonTwoIndexAntiSymmetricImplD

View File

@ -0,0 +1 @@
#define IMPLEMENTATION SpWilsonTwoIndexAntiSymmetricImplF

View File

@ -0,0 +1 @@
#define IMPLEMENTATION SpWilsonTwoIndexSymmetricImplD

View File

@ -0,0 +1 @@
#define IMPLEMENTATION SpWilsonTwoIndexSymmetricImplF

View File

@ -10,12 +10,18 @@ WILSON_IMPL_LIST=" \
WilsonImplF \
WilsonImplD \
WilsonImplD2 \
SpWilsonImplF \
SpWilsonImplD \
WilsonAdjImplF \
WilsonAdjImplD \
WilsonTwoIndexSymmetricImplF \
WilsonTwoIndexSymmetricImplD \
WilsonTwoIndexAntiSymmetricImplF \
WilsonTwoIndexAntiSymmetricImplD \
SpWilsonTwoIndexAntiSymmetricImplF \
SpWilsonTwoIndexAntiSymmetricImplD \
SpWilsonTwoIndexSymmetricImplF \
SpWilsonTwoIndexSymmetricImplD \
GparityWilsonImplF \
GparityWilsonImplD "

View File

@ -39,6 +39,9 @@ NAMESPACE_BEGIN(Grid);
typedef WilsonGaugeAction<PeriodicGimplR> WilsonGaugeActionR;
typedef WilsonGaugeAction<PeriodicGimplF> WilsonGaugeActionF;
typedef WilsonGaugeAction<PeriodicGimplD> WilsonGaugeActionD;
typedef WilsonGaugeAction<SpPeriodicGimplR> SpWilsonGaugeActionR;
typedef WilsonGaugeAction<SpPeriodicGimplF> SpWilsonGaugeActionF;
typedef WilsonGaugeAction<SpPeriodicGimplD> SpWilsonGaugeActionD;
typedef PlaqPlusRectangleAction<PeriodicGimplR> PlaqPlusRectangleActionR;
typedef PlaqPlusRectangleAction<PeriodicGimplF> PlaqPlusRectangleActionF;
typedef PlaqPlusRectangleAction<PeriodicGimplD> PlaqPlusRectangleActionD;

View File

@ -61,7 +61,7 @@ NAMESPACE_BEGIN(Grid);
typedef typename Impl::Field Field;
// hardcodes the exponential approximation in the template
template <class S, int Nrepresentation = Nc, int Nexp = 12 > class GaugeImplTypes {
template <class S, int Nrepresentation = Nc, int Nexp = 12, class Group = SU<Nc> > class GaugeImplTypes {
public:
typedef S Simd;
typedef typename Simd::scalar_type scalar_type;
@ -78,8 +78,6 @@ public:
typedef Lattice<SiteLink> LinkField;
typedef Lattice<SiteField> Field;
typedef SU<Nrepresentation> Group;
// Guido: we can probably separate the types from the HMC functions
// this will create 2 kind of implementations
// probably confusing the users
@ -119,6 +117,7 @@ public:
//
LinkField Pmu(P.Grid());
Pmu = Zero();
for (int mu = 0; mu < Nd; mu++) {
Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu);
RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR) ;
@ -127,7 +126,11 @@ public:
}
}
static inline Field projectForce(Field &P) { return Ta(P); }
static inline Field projectForce(Field &P) {
Field ret(P.Grid());
Group::taProj(P, ret);
return ret;
}
static inline void update_field(Field& P, Field& U, double ep){
//static std::chrono::duration<double> diff;
@ -137,7 +140,8 @@ public:
autoView(P_v,P,AcceleratorRead);
accelerator_for(ss, P.Grid()->oSites(),1,{
for (int mu = 0; mu < Nd; mu++) {
U_v[ss](mu) = ProjectOnGroup(Exponentiate(P_v[ss](mu), ep, Nexp) * U_v[ss](mu));
U_v[ss](mu) = Exponentiate(P_v[ss](mu), ep, Nexp) * U_v[ss](mu);
U_v[ss](mu) = Group::ProjectOnGeneralGroup(U_v[ss](mu));
}
});
//auto end = std::chrono::high_resolution_clock::now();
@ -157,7 +161,7 @@ public:
}
static inline void Project(Field &U) {
ProjectSUn(U);
Group::ProjectOnSpecialGroup(U);
}
static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
@ -171,6 +175,7 @@ public:
static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) {
Group::ColdConfiguration(pRNG, U);
}
};
@ -178,10 +183,17 @@ typedef GaugeImplTypes<vComplex, Nc> GimplTypesR;
typedef GaugeImplTypes<vComplexF, Nc> GimplTypesF;
typedef GaugeImplTypes<vComplexD, Nc> GimplTypesD;
typedef GaugeImplTypes<vComplex, Nc, 12, Sp<Nc> > SpGimplTypesR;
typedef GaugeImplTypes<vComplexF, Nc, 12, Sp<Nc> > SpGimplTypesF;
typedef GaugeImplTypes<vComplexD, Nc, 12, Sp<Nc> > SpGimplTypesD;
typedef GaugeImplTypes<vComplex, SU<Nc>::AdjointDimension> GimplAdjointTypesR;
typedef GaugeImplTypes<vComplexF, SU<Nc>::AdjointDimension> GimplAdjointTypesF;
typedef GaugeImplTypes<vComplexD, SU<Nc>::AdjointDimension> GimplAdjointTypesD;
NAMESPACE_END(Grid);
#endif // GRID_GAUGE_IMPL_TYPES_H

View File

@ -176,7 +176,7 @@ public:
return PeriodicBC::CshiftLink(Link,mu,shift);
}
static inline void setDirections(std::vector<int> &conjDirs) { _conjDirs=conjDirs; }
static inline void setDirections(const std::vector<int> &conjDirs) { _conjDirs=conjDirs; }
static inline std::vector<int> getDirections(void) { return _conjDirs; }
static inline bool isPeriodicGaugeField(void) { return false; }
};
@ -193,6 +193,11 @@ typedef ConjugateGaugeImpl<GimplTypesR> ConjugateGimplR; // Real.. whichever pre
typedef ConjugateGaugeImpl<GimplTypesF> ConjugateGimplF; // Float
typedef ConjugateGaugeImpl<GimplTypesD> ConjugateGimplD; // Double
typedef PeriodicGaugeImpl<SpGimplTypesR> SpPeriodicGimplR; // Real.. whichever prec
typedef PeriodicGaugeImpl<SpGimplTypesF> SpPeriodicGimplF; // Float
typedef PeriodicGaugeImpl<SpGimplTypesD> SpPeriodicGimplD; // Double
NAMESPACE_END(Grid);
#endif

View File

@ -43,7 +43,7 @@ public:
private:
RealD c_plaq;
RealD c_rect;
typename WilsonLoops<Gimpl>::StapleAndRectStapleAllWorkspace workspace;
public:
PlaqPlusRectangleAction(RealD b,RealD c): c_plaq(b),c_rect(c){};
@ -79,27 +79,18 @@ public:
GridBase *grid = Umu.Grid();
std::vector<GaugeLinkField> U (Nd,grid);
std::vector<GaugeLinkField> U2(Nd,grid);
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
WilsonLoops<Gimpl>::RectStapleDouble(U2[mu],U[mu],mu);
}
std::vector<GaugeLinkField> RectStaple(Nd,grid), Staple(Nd,grid);
WilsonLoops<Gimpl>::StapleAndRectStapleAll(Staple, RectStaple, U, workspace);
GaugeLinkField dSdU_mu(grid);
GaugeLinkField staple(grid);
for (int mu=0; mu < Nd; mu++){
// Staple in direction mu
WilsonLoops<Gimpl>::Staple(staple,Umu,mu);
dSdU_mu = Ta(U[mu]*staple)*factor_p;
WilsonLoops<Gimpl>::RectStaple(Umu,staple,U2,U,mu);
dSdU_mu = dSdU_mu + Ta(U[mu]*staple)*factor_r;
dSdU_mu = Ta(U[mu]*Staple[mu])*factor_p;
dSdU_mu = dSdU_mu + Ta(U[mu]*RectStaple[mu])*factor_r;
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
}

View File

@ -53,9 +53,10 @@ NAMESPACE_BEGIN(Grid);
Integer ReliableUpdateFreq;
protected:
//Action evaluation
//Allow derived classes to override the multishift CG
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){
#if 0
#if 1
SchurDifferentiableOperator<ImplD> schurOp(numerator ? NumOpD : DenOpD);
ConjugateGradientMultiShift<FermionFieldD> msCG(MaxIter, approx);
msCG(schurOp,in, out);
@ -70,9 +71,10 @@ NAMESPACE_BEGIN(Grid);
msCG(schurOpD, in, out);
#endif
}
//Force evaluation
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){
SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD);
SchurDifferentiableOperator<ImplF> schurOpF (numerator ? NumOpF : DenOpF);
SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
FermionFieldD inD(NumOpD.FermionRedBlackGrid());
FermionFieldD outD(NumOpD.FermionRedBlackGrid());
@ -84,20 +86,15 @@ NAMESPACE_BEGIN(Grid);
virtual void ImportGauge(const typename ImplD::GaugeField &Ud){
typename ImplF::GaugeField Uf(NumOpF.GaugeGrid());
typename ImplD::GaugeField Ud2(NumOpD.GaugeGrid());
precisionChange(Uf, Ud);
precisionChange(Ud2, Ud);
std::cout << "Importing "<<norm2(Ud)<<" "<< norm2(Uf)<<" " << norm2(Ud2)<<std::endl;
std::cout << "Importing "<<norm2(Ud)<<" "<< norm2(Uf)<<" " <<std::endl;
NumOpD.ImportGauge(Ud);
DenOpD.ImportGauge(Ud);
NumOpF.ImportGauge(Uf);
DenOpF.ImportGauge(Uf);
NumOpD.ImportGauge(Ud2);
DenOpD.ImportGauge(Ud2);
}
public:

View File

@ -207,20 +207,27 @@ NAMESPACE_BEGIN(Grid);
//X = (Mdag M)^-1 V^dag phi
//Y = (Mdag)^-1 V^dag phi
Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi
std::cout << GridLogMessage <<" Y "<<norm2(Y)<<std::endl;
X=Zero();
DerivativeSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi
std::cout << GridLogMessage <<" X "<<norm2(X)<<std::endl;
Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi
std::cout << GridLogMessage <<" Y "<<norm2(Y)<<std::endl;
// phi^dag V (Mdag M)^-1 dV^dag phi
Vpc.MpcDagDeriv(force , X, PhiOdd ); dSdU = force;
std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl;
// phi^dag dV (Mdag M)^-1 V^dag phi
Vpc.MpcDeriv(force , PhiOdd, X ); dSdU = dSdU+force;
std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl;
// - phi^dag V (Mdag M)^-1 Mdag dM (Mdag M)^-1 V^dag phi
// - phi^dag V (Mdag M)^-1 dMdag M (Mdag M)^-1 V^dag phi
Mpc.MpcDeriv(force,Y,X); dSdU = dSdU-force;
std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl;
Mpc.MpcDagDeriv(force,X,Y); dSdU = dSdU-force;
std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl;
// FIXME No force contribution from EvenEven assumed here
// Needs a fix for clover.

View File

@ -225,6 +225,18 @@ template <class RepresentationsPolicy,
using GenericHMCRunnerHirep =
HMCWrapperTemplate<PeriodicGimplR, Integrator, RepresentationsPolicy>;
// sp2n
template <template <typename, typename, typename> class Integrator>
using GenericSpHMCRunner = HMCWrapperTemplate<SpPeriodicGimplR, Integrator>;
template <class RepresentationsPolicy,
template <typename, typename, typename> class Integrator>
using GenericSpHMCRunnerHirep =
HMCWrapperTemplate<SpPeriodicGimplR, Integrator, RepresentationsPolicy>;
template <class Implementation, class RepresentationsPolicy,
template <typename, typename, typename> class Integrator>
using GenericHMCRunnerTemplate = HMCWrapperTemplate<Implementation, Integrator, RepresentationsPolicy>;

View File

@ -284,11 +284,12 @@ public:
TheIntegrator.print_timer();
TheIntegrator.Smearer.set_Field(Ucur);
for (int obs = 0; obs < Observables.size(); obs++) {
std::cout << GridLogDebug << "Observables # " << obs << std::endl;
std::cout << GridLogDebug << "Observables total " << Observables.size() << std::endl;
std::cout << GridLogDebug << "Observables pointer " << Observables[obs] << std::endl;
Observables[obs]->TrajectoryComplete(traj + 1, Ucur, sRNG, pRNG);
Observables[obs]->TrajectoryComplete(traj + 1, TheIntegrator.Smearer, sRNG, pRNG);
}
std::cout << GridLogHMC << ":::::::::::::::::::::::::::::::::::::::::::" << std::endl;
}

View File

@ -35,13 +35,16 @@ class CheckpointerParameters : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(CheckpointerParameters,
std::string, config_prefix,
std::string, smeared_prefix,
std::string, rng_prefix,
int, saveInterval,
bool, saveSmeared,
std::string, format, );
CheckpointerParameters(std::string cf = "cfg", std::string rn = "rng",
CheckpointerParameters(std::string cf = "cfg", std::string sf="cfg_smr" , std::string rn = "rng",
int savemodulo = 1, const std::string &f = "IEEE64BIG")
: config_prefix(cf),
smeared_prefix(sf),
rng_prefix(rn),
saveInterval(savemodulo),
format(f){};
@ -61,13 +64,21 @@ template <class Impl>
class BaseHmcCheckpointer : public HmcObservable<typename Impl::Field> {
public:
void build_filenames(int traj, CheckpointerParameters &Params,
std::string &conf_file, std::string &rng_file) {
std::string &conf_file,
std::string &smear_file,
std::string &rng_file) {
{
std::ostringstream os;
os << Params.rng_prefix << "." << traj;
rng_file = os.str();
}
{
std::ostringstream os;
os << Params.smeared_prefix << "." << traj;
smear_file = os.str();
}
{
std::ostringstream os;
os << Params.config_prefix << "." << traj;
@ -84,6 +95,11 @@ public:
}
virtual void initialize(const CheckpointerParameters &Params) = 0;
virtual void TrajectoryComplete(int traj,
typename Impl::Field &U,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG) { assert(0); } ; // HMC should pass the smart config with smeared and unsmeared
virtual void CheckpointRestore(int traj, typename Impl::Field &U,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG) = 0;

View File

@ -61,11 +61,14 @@ public:
fout.close();
}
void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) {
void TrajectoryComplete(int traj,
ConfigurationBase<Field> &SmartConfig,
GridSerialRNG &sRNG, GridParallelRNG &pRNG)
{
if ((traj % Params.saveInterval) == 0) {
std::string config, rng;
this->build_filenames(traj, Params, config, rng);
std::string config, rng, smr;
this->build_filenames(traj, Params, config, smr, rng);
uint32_t nersc_csum;
uint32_t scidac_csuma;
@ -74,9 +77,15 @@ public:
BinarySimpleUnmunger<sobj_double, sobj> munge;
truncate(rng);
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
truncate(config);
std::cout << GridLogMessage << "Written Binary RNG " << rng
<< " checksum " << std::hex
<< nersc_csum <<"/"
<< scidac_csuma <<"/"
<< scidac_csumb
<< std::dec << std::endl;
BinaryIO::writeLatticeObject<vobj, sobj_double>(U, config, munge, 0, Params.format,
truncate(config);
BinaryIO::writeLatticeObject<vobj, sobj_double>(SmartConfig.get_U(false), config, munge, 0, Params.format,
nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << "Written Binary Configuration " << config
@ -85,6 +94,18 @@ public:
<< scidac_csuma <<"/"
<< scidac_csumb
<< std::dec << std::endl;
if ( Params.saveSmeared ) {
truncate(smr);
BinaryIO::writeLatticeObject<vobj, sobj_double>(SmartConfig.get_U(true), smr, munge, 0, Params.format,
nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << "Written Binary Smeared Configuration " << smr
<< " checksum " << std::hex
<< nersc_csum <<"/"
<< scidac_csuma <<"/"
<< scidac_csumb
<< std::dec << std::endl;
}
}
};

View File

@ -69,17 +69,27 @@ public:
}
}
void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG,
void TrajectoryComplete(int traj,
ConfigurationBase<GaugeField> &SmartConfig,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG) {
if ((traj % Params.saveInterval) == 0) {
std::string config, rng;
std::string config, rng, smr;
this->build_filenames(traj, Params, config, rng);
GridBase *grid = U.Grid();
GridBase *grid = SmartConfig.get_U(false).Grid();
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << "Written BINARY RNG " << rng
<< " checksum " << std::hex
<< nersc_csum<<"/"
<< scidac_csuma<<"/"
<< scidac_csumb
<< std::dec << std::endl;
IldgWriter _IldgWriter(grid->IsBoss());
_IldgWriter.open(config);
_IldgWriter.writeConfiguration<GaugeStats>(U, traj, config, config);
_IldgWriter.writeConfiguration<GaugeStats>(SmartConfig.get_U(false), traj, config, config);
_IldgWriter.close();
std::cout << GridLogMessage << "Written ILDG Configuration on " << config
@ -88,6 +98,21 @@ public:
<< scidac_csuma<<"/"
<< scidac_csumb
<< std::dec << std::endl;
if ( Params.saveSmeared ) {
IldgWriter _IldgWriter(grid->IsBoss());
_IldgWriter.open(smr);
_IldgWriter.writeConfiguration<GaugeStats>(SmartConfig.get_U(true), traj, config, config);
_IldgWriter.close();
std::cout << GridLogMessage << "Written ILDG Configuration on " << smr
<< " checksum " << std::hex
<< nersc_csum<<"/"
<< scidac_csuma<<"/"
<< scidac_csumb
<< std::dec << std::endl;
}
}
};

View File

@ -52,23 +52,29 @@ public:
Params.format = "IEEE64BIG"; // fixed, overwrite any other choice
}
void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG,
GridParallelRNG &pRNG) {
virtual void TrajectoryComplete(int traj,
ConfigurationBase<GaugeField> &SmartConfig,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG)
{
if ((traj % Params.saveInterval) == 0) {
std::string config, rng;
this->build_filenames(traj, Params, config, rng);
std::string config, rng, smr;
this->build_filenames(traj, Params, config, smr, rng);
int precision32 = 1;
int tworow = 0;
NerscIO::writeRNGState(sRNG, pRNG, rng);
NerscIO::writeConfiguration<GaugeStats>(U, config, tworow, precision32);
NerscIO::writeConfiguration<GaugeStats>(SmartConfig.get_U(false), config, tworow, precision32);
if ( Params.saveSmeared ) {
NerscIO::writeConfiguration<GaugeStats>(SmartConfig.get_U(true), smr, tworow, precision32);
}
}
};
void CheckpointRestore(int traj, GaugeField &U, GridSerialRNG &sRNG,
GridParallelRNG &pRNG) {
std::string config, rng;
this->build_filenames(traj, Params, config, rng);
std::string config, rng, smr;
this->build_filenames(traj, Params, config, smr, rng );
this->check_filename(rng);
this->check_filename(config);

View File

@ -70,19 +70,37 @@ class ScidacHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
}
}
void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG,
void TrajectoryComplete(int traj,
ConfigurationBase<Field> &SmartConfig,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG) {
if ((traj % Params.saveInterval) == 0) {
std::string config, rng;
this->build_filenames(traj, Params, config, rng);
GridBase *grid = U.Grid();
std::string config, rng,smr;
this->build_filenames(traj, Params, config, smr, rng);
GridBase *grid = SmartConfig.get_U(false).Grid();
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
ScidacWriter _ScidacWriter(grid->IsBoss());
_ScidacWriter.open(config);
_ScidacWriter.writeScidacFieldRecord(U, MData);
_ScidacWriter.close();
std::cout << GridLogMessage << "Written Binary RNG " << rng
<< " checksum " << std::hex
<< nersc_csum <<"/"
<< scidac_csuma <<"/"
<< scidac_csumb
<< std::dec << std::endl;
{
ScidacWriter _ScidacWriter(grid->IsBoss());
_ScidacWriter.open(config);
_ScidacWriter.writeScidacFieldRecord(SmartConfig.get_U(false), MData);
_ScidacWriter.close();
}
if ( Params.saveSmeared ) {
ScidacWriter _ScidacWriter(grid->IsBoss());
_ScidacWriter.open(smr);
_ScidacWriter.writeScidacFieldRecord(SmartConfig.get_U(true), MData);
_ScidacWriter.close();
}
std::cout << GridLogMessage << "Written Scidac Configuration on " << config << std::endl;
}
};

View File

@ -66,6 +66,7 @@ public:
template <class FieldImplementation_, class SmearingPolicy, class RepresentationPolicy>
class Integrator {
protected:
public:
typedef FieldImplementation_ FieldImplementation;
typedef typename FieldImplementation::Field MomentaField; //for readability
typedef typename FieldImplementation::Field Field;
@ -96,7 +97,6 @@ protected:
{
t_P[level] += ep;
update_P(P, U, level, ep);
std::cout << GridLogIntegrator << "[" << level << "] P " << " dt " << ep << " : t_P " << t_P[level] << std::endl;
}
@ -130,28 +130,20 @@ protected:
Field force(U.Grid());
conformable(U.Grid(), Mom.Grid());
Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared);
double start_force = usecond();
std::cout << GridLogMessage << "AuditForce["<<level<<"]["<<a<<"] before"<<std::endl;
as[level].actions.at(a)->deriv_timer_start();
as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta
as[level].actions.at(a)->deriv(Smearer, force); // deriv should NOT include Ta
as[level].actions.at(a)->deriv_timer_stop();
std::cout << GridLogMessage << "AuditForce["<<level<<"]["<<a<<"] after"<<std::endl;
std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl;
auto name = as[level].actions.at(a)->action_name();
if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
force = FieldImplementation::projectForce(force); // Ta for gauge fields
double end_force = usecond();
// DumpSliceNorm("force ",force,Nd-1);
MomFilter->applyFilter(force);
std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<<a <<"] "<<name<<" dt "<<ep<< std::endl;
DumpSliceNorm("force filtered ",force,Nd-1);
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x])
Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
@ -377,14 +369,9 @@ public:
auto name = as[level].actions.at(actionID)->action_name();
std::cout << GridLogMessage << "refresh [" << level << "][" << actionID << "] "<<name << std::endl;
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
std::cout << GridLogMessage << "AuditRefresh["<<level<<"]["<<actionID<<"] before"<<std::endl;
as[level].actions.at(actionID)->refresh_timer_start();
as[level].actions.at(actionID)->refresh(Us, sRNG, pRNG);
as[level].actions.at(actionID)->refresh(Smearer, sRNG, pRNG);
as[level].actions.at(actionID)->refresh_timer_stop();
std::cout << GridLogMessage << "AuditRefresh["<<level<<"]["<<actionID<<"] after"<<std::endl;
}
@ -425,10 +412,9 @@ public:
// get gauge field from the SmearingPolicy and
// based on the boolean is_smeared in actionID
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
as[level].actions.at(actionID)->S_timer_start();
Hterm = as[level].actions.at(actionID)->S(Us);
Hterm = as[level].actions.at(actionID)->S(Smearer);
as[level].actions.at(actionID)->S_timer_stop();
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
H += Hterm;
@ -469,12 +455,11 @@ public:
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
// get gauge field from the SmearingPolicy and
// based on the boolean is_smeared in actionID
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
as[level].actions.at(actionID)->S_timer_start();
Hterm = as[level].actions.at(actionID)->Sinitial(Us);
as[level].actions.at(actionID)->S_timer_stop();
as[level].actions.at(actionID)->S_timer_start();
Hterm = as[level].actions.at(actionID)->S(Smearer);
as[level].actions.at(actionID)->S_timer_stop();
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
H += Hterm;

View File

@ -34,6 +34,13 @@ NAMESPACE_BEGIN(Grid);
template <class Field>
class HmcObservable {
public:
virtual void TrajectoryComplete(int traj,
ConfigurationBase<Field> &SmartConfig,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG)
{
TrajectoryComplete(traj,SmartConfig.get_U(false),sRNG,pRNG); // Unsmeared observable
};
virtual void TrajectoryComplete(int traj,
Field &U,
GridSerialRNG &sRNG,

View File

@ -42,6 +42,18 @@ public:
// necessary for HmcObservable compatibility
typedef typename Impl::Field Field;
virtual void TrajectoryComplete(int traj,
ConfigurationBase<Field> &SmartConfig,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG)
{
std::cout << GridLogMessage << "+++++++++++++++++++"<<std::endl;
std::cout << GridLogMessage << "Unsmeared plaquette"<<std::endl;
TrajectoryComplete(traj,SmartConfig.get_U(false),sRNG,pRNG); // Unsmeared observable
std::cout << GridLogMessage << "Smeared plaquette"<<std::endl;
TrajectoryComplete(traj,SmartConfig.get_U(true),sRNG,pRNG); // Unsmeared observable
std::cout << GridLogMessage << "+++++++++++++++++++"<<std::endl;
};
void TrajectoryComplete(int traj,
Field &U,
GridSerialRNG &sRNG,

View File

@ -13,7 +13,7 @@ NAMESPACE_BEGIN(Grid);
* Empty since HMC updates already the fundamental representation
*/
template <int ncolour>
template <int ncolour, class group_name>
class FundamentalRep {
public:
static const int Dimension = ncolour;
@ -21,7 +21,7 @@ public:
// typdef to be used by the Representations class in HMC to get the
// types for the higher representation fields
typedef typename SU<ncolour>::LatticeMatrix LatticeMatrix;
typedef typename GaugeGroup<ncolour,group_name>::LatticeMatrix LatticeMatrix;
typedef LatticeGaugeField LatticeField;
explicit FundamentalRep(GridBase* grid) {} //do nothing
@ -45,7 +45,8 @@ public:
typedef FundamentalRep<Nc> FundamentalRepresentation;
typedef FundamentalRep<Nc,GroupName::SU> FundamentalRepresentation;
typedef FundamentalRep<Nc,GroupName::Sp> SpFundamentalRepresentation;
NAMESPACE_END(Grid);

View File

@ -20,14 +20,14 @@ NAMESPACE_BEGIN(Grid);
* in the SUnTwoIndex.h file
*/
template <int ncolour, TwoIndexSymmetry S>
template <int ncolour, TwoIndexSymmetry S, class group_name = GroupName::SU>
class TwoIndexRep {
public:
// typdef to be used by the Representations class in HMC to get the
// types for the higher representation fields
typedef typename SU_TwoIndex<ncolour, S>::LatticeTwoIndexMatrix LatticeMatrix;
typedef typename SU_TwoIndex<ncolour, S>::LatticeTwoIndexField LatticeField;
static const int Dimension = ncolour * (ncolour + S) / 2;
typedef typename GaugeGroupTwoIndex<ncolour, S, group_name>::LatticeTwoIndexMatrix LatticeMatrix;
typedef typename GaugeGroupTwoIndex<ncolour, S, group_name>::LatticeTwoIndexField LatticeField;
static const int Dimension = GaugeGroupTwoIndex<ncolour,S,group_name>::Dimension;
static const bool isFundamental = false;
LatticeField U;
@ -43,10 +43,10 @@ public:
U = Zero();
LatticeColourMatrix tmp(Uin.Grid());
Vector<typename SU<ncolour>::Matrix> eij(Dimension);
Vector<typename GaugeGroup<ncolour,group_name>::Matrix> eij(Dimension);
for (int a = 0; a < Dimension; a++)
SU_TwoIndex<ncolour, S>::base(a, eij[a]);
GaugeGroupTwoIndex<ncolour, S, group_name>::base(a, eij[a]);
for (int mu = 0; mu < Nd; mu++) {
auto Uin_mu = peekLorentz(Uin, mu);
@ -71,7 +71,7 @@ public:
out_mu = Zero();
typename SU<ncolour>::LatticeAlgebraVector h(in.Grid());
typename GaugeGroup<ncolour, group_name>::LatticeAlgebraVector h(in.Grid());
projectOnAlgebra(h, in_mu, double(Nc + 2 * S)); // factor T(r)/T(fund)
FundamentalLieAlgebraMatrix(h, out_mu); // apply scale only once
pokeLorentz(out, out_mu, mu);
@ -80,20 +80,23 @@ public:
}
private:
void projectOnAlgebra(typename SU<ncolour>::LatticeAlgebraVector &h_out,
void projectOnAlgebra(typename GaugeGroup<ncolour, group_name>::LatticeAlgebraVector &h_out,
const LatticeMatrix &in, Real scale = 1.0) const {
SU_TwoIndex<ncolour, S>::projectOnAlgebra(h_out, in, scale);
GaugeGroupTwoIndex<ncolour, S,group_name>::projectOnAlgebra(h_out, in, scale);
}
void FundamentalLieAlgebraMatrix(
typename SU<ncolour>::LatticeAlgebraVector &h,
typename SU<ncolour>::LatticeMatrix &out, Real scale = 1.0) const {
SU<ncolour>::FundamentalLieAlgebraMatrix(h, out, scale);
typename GaugeGroup<ncolour, group_name>::LatticeAlgebraVector &h,
typename GaugeGroup<ncolour, group_name>::LatticeMatrix &out, Real scale = 1.0) const {
GaugeGroup<ncolour,group_name>::FundamentalLieAlgebraMatrix(h, out, scale);
}
};
typedef TwoIndexRep<Nc, Symmetric> TwoIndexSymmetricRepresentation;
typedef TwoIndexRep<Nc, AntiSymmetric> TwoIndexAntiSymmetricRepresentation;
typedef TwoIndexRep<Nc, Symmetric, GroupName::SU> TwoIndexSymmetricRepresentation;
typedef TwoIndexRep<Nc, AntiSymmetric, GroupName::SU> TwoIndexAntiSymmetricRepresentation;
typedef TwoIndexRep<Nc, Symmetric, GroupName::Sp> SpTwoIndexSymmetricRepresentation;
typedef TwoIndexRep<Nc, AntiSymmetric, GroupName::Sp> SpTwoIndexAntiSymmetricRepresentation;
NAMESPACE_END(Grid);

View File

@ -7,26 +7,27 @@
NAMESPACE_BEGIN(Grid);
//trivial class for no smearing
template< class Impl >
class NoSmearing
class NoSmearing : public ConfigurationBase<typename Impl::Field>
{
public:
INHERIT_FIELD_TYPES(Impl);
Field* ThinField;
Field* ThinLinks;
NoSmearing(): ThinField(NULL) {}
NoSmearing(): ThinLinks(NULL) {}
void set_Field(Field& U) { ThinField = &U; }
virtual void set_Field(Field& U) { ThinLinks = &U; }
void smeared_force(Field&) const {}
virtual void smeared_force(Field&) {}
Field& get_SmearedU() { return *ThinField; }
virtual Field& get_SmearedU() { return *ThinLinks; }
Field &get_U(bool smeared = false)
virtual Field &get_U(bool smeared = false)
{
return *ThinField;
return *ThinLinks;
}
};
@ -42,19 +43,24 @@ public:
It stores a list of smeared configurations.
*/
template <class Gimpl>
class SmearedConfiguration
class SmearedConfiguration : public ConfigurationBase<typename Gimpl::Field>
{
public:
INHERIT_GIMPL_TYPES(Gimpl);
private:
protected:
const unsigned int smearingLevels;
Smear_Stout<Gimpl> *StoutSmearing;
std::vector<GaugeField> SmearedSet;
public:
GaugeField* ThinLinks; /* Pointer to the thin links configuration */ // move to base???
protected:
// Member functions
//====================================================================
void fill_smearedSet(GaugeField &U)
// Overridden in masked version
virtual void fill_smearedSet(GaugeField &U)
{
ThinLinks = &U; // attach the smearing routine to the field U
@ -82,9 +88,10 @@ private:
}
}
}
//====================================================================
GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime,
const GaugeField& GaugeK) const
//overridden in masked verson
virtual GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime,
const GaugeField& GaugeK) const
{
GridBase* grid = GaugeK.Grid();
GaugeField C(grid), SigmaK(grid), iLambda(grid);
@ -213,8 +220,6 @@ private:
//====================================================================
public:
GaugeField*
ThinLinks; /* Pointer to the thin links configuration */
/* Standard constructor */
SmearedConfiguration(GridCartesian* UGrid, unsigned int Nsmear,
@ -230,7 +235,7 @@ public:
: smearingLevels(0), StoutSmearing(nullptr), SmearedSet(), ThinLinks(NULL) {}
// attach the smeared routines to the thin links U and fill the smeared set
void set_Field(GaugeField &U)
virtual void set_Field(GaugeField &U)
{
double start = usecond();
fill_smearedSet(U);
@ -240,7 +245,7 @@ public:
}
//====================================================================
void smeared_force(GaugeField &SigmaTilde) const
virtual void smeared_force(GaugeField &SigmaTilde)
{
if (smearingLevels > 0)
{
@ -267,14 +272,16 @@ public:
}
double end = usecond();
double time = (end - start)/ 1e3;
std::cout << GridLogMessage << "Smearing force in " << time << " ms" << std::endl;
std::cout << GridLogMessage << " GaugeConfiguration: Smeared Force chain rule took " << time << " ms" << std::endl;
} // if smearingLevels = 0 do nothing
SigmaTilde=Gimpl::projectForce(SigmaTilde); // Ta
}
//====================================================================
GaugeField& get_SmearedU() { return SmearedSet[smearingLevels - 1]; }
virtual GaugeField& get_SmearedU() { return SmearedSet[smearingLevels - 1]; }
GaugeField &get_U(bool smeared = false)
virtual GaugeField &get_U(bool smeared = false)
{
// get the config, thin links by default
if (smeared)

View File

@ -0,0 +1,813 @@
/*!
@file GaugeConfiguration.h
@brief Declares the GaugeConfiguration class
*/
#pragma once
NAMESPACE_BEGIN(Grid);
/*!
@brief Smeared configuration masked container
Modified for a multi-subset smearing (aka Luscher Flowed HMC)
*/
template <class Gimpl>
class SmearedConfigurationMasked : public SmearedConfiguration<Gimpl>
{
public:
INHERIT_GIMPL_TYPES(Gimpl);
private:
// These live in base class
// const unsigned int smearingLevels;
// Smear_Stout<Gimpl> *StoutSmearing;
// std::vector<GaugeField> SmearedSet;
std::vector<LatticeLorentzComplex> masks;
typedef typename SU3Adjoint::AMatrix AdjMatrix;
typedef typename SU3Adjoint::LatticeAdjMatrix AdjMatrixField;
typedef typename SU3Adjoint::LatticeAdjVector AdjVectorField;
// Adjoint vector to GaugeField force
void InsertForce(GaugeField &Fdet,AdjVectorField &Fdet_nu,int nu)
{
Complex ci(0,1);
GaugeLinkField Fdet_pol(Fdet.Grid());
Fdet_pol=Zero();
for(int e=0;e<8;e++){
ColourMatrix te;
SU3::generator(e, te);
auto tmp=peekColour(Fdet_nu,e);
Fdet_pol=Fdet_pol + ci*tmp*te; // but norm of te is different.. why?
}
pokeLorentz(Fdet, Fdet_pol, nu);
}
void Compute_MpInvJx_dNxxdSy(const GaugeLinkField &PlaqL,const GaugeLinkField &PlaqR, AdjMatrixField MpInvJx,AdjVectorField &Fdet2 )
{
GaugeLinkField UtaU(PlaqL.Grid());
GaugeLinkField D(PlaqL.Grid());
AdjMatrixField Dbc(PlaqL.Grid());
LatticeComplex tmp(PlaqL.Grid());
const int Ngen = SU3Adjoint::Dimension;
Complex ci(0,1);
ColourMatrix ta,tb,tc;
for(int a=0;a<Ngen;a++) {
SU3::generator(a, ta);
// Qlat Tb = 2i Tb^Grid
UtaU= 2.0*ci*adj(PlaqL)*ta*PlaqR;
for(int c=0;c<Ngen;c++) {
SU3::generator(c, tc);
D = Ta( (2.0)*ci*tc *UtaU);
for(int b=0;b<Ngen;b++){
SU3::generator(b, tb);
tmp =-trace(ci*tb*D);
PokeIndex<ColourIndex>(Dbc,tmp,b,c); // Adjoint rep
}
}
tmp = trace(MpInvJx * Dbc);
PokeIndex<ColourIndex>(Fdet2,tmp,a);
}
}
void ComputeNxy(const GaugeLinkField &PlaqL,const GaugeLinkField &PlaqR,AdjMatrixField &NxAd)
{
GaugeLinkField Nx(PlaqL.Grid());
const int Ngen = SU3Adjoint::Dimension;
Complex ci(0,1);
ColourMatrix tb;
ColourMatrix tc;
for(int b=0;b<Ngen;b++) {
SU3::generator(b, tb);
Nx = (2.0)*Ta( adj(PlaqL)*ci*tb * PlaqR );
for(int c=0;c<Ngen;c++) {
SU3::generator(c, tc);
auto tmp =closure( -trace(ci*tc*Nx));
PokeIndex<ColourIndex>(NxAd,tmp,c,b);
}
}
}
void ApplyMask(GaugeField &U,int smr)
{
LatticeComplex tmp(U.Grid());
GaugeLinkField Umu(U.Grid());
for(int mu=0;mu<Nd;mu++){
Umu=PeekIndex<LorentzIndex>(U,mu);
tmp=PeekIndex<LorentzIndex>(masks[smr],mu);
Umu=Umu*tmp;
PokeIndex<LorentzIndex>(U, Umu, mu);
}
}
public:
void logDetJacobianForceLevel(const GaugeField &U, GaugeField &force ,int smr)
{
GridBase* grid = U.Grid();
ColourMatrix tb;
ColourMatrix tc;
ColourMatrix ta;
GaugeField C(grid);
GaugeField Umsk(grid);
std::vector<GaugeLinkField> Umu(Nd,grid);
GaugeLinkField Cmu(grid); // U and staple; C contains factor of epsilon
GaugeLinkField Zx(grid); // U times Staple, contains factor of epsilon
GaugeLinkField Nxx(grid); // Nxx fundamental space
GaugeLinkField Utmp(grid);
GaugeLinkField PlaqL(grid);
GaugeLinkField PlaqR(grid);
const int Ngen = SU3Adjoint::Dimension;
AdjMatrix TRb;
ColourMatrix Ident;
LatticeComplex cplx(grid);
AdjVectorField dJdXe_nMpInv(grid);
AdjVectorField dJdXe_nMpInv_y(grid);
AdjMatrixField MpAd(grid); // Mprime luchang's notes
AdjMatrixField MpAdInv(grid); // Mprime inverse
AdjMatrixField NxxAd(grid); // Nxx in adjoint space
AdjMatrixField JxAd(grid);
AdjMatrixField ZxAd(grid);
AdjMatrixField mZxAd(grid);
AdjMatrixField X(grid);
Complex ci(0,1);
RealD t0 = usecond();
Ident = ComplexD(1.0);
for(int d=0;d<Nd;d++){
Umu[d] = peekLorentz(U, d);
}
int mu= (smr/2) %Nd;
////////////////////////////////////////////////////////////////////////////////
// Mask the gauge field
////////////////////////////////////////////////////////////////////////////////
auto mask=PeekIndex<LorentzIndex>(masks[smr],mu); // the cb mask
Umsk = U;
ApplyMask(Umsk,smr);
Utmp = peekLorentz(Umsk,mu);
////////////////////////////////////////////////////////////////////////////////
// Retrieve the eps/rho parameter(s) -- could allow all different but not so far
////////////////////////////////////////////////////////////////////////////////
double rho=this->StoutSmearing->SmearRho[1];
int idx=0;
for(int mu=0;mu<4;mu++){
for(int nu=0;nu<4;nu++){
if ( mu!=nu) assert(this->StoutSmearing->SmearRho[idx]==rho);
else assert(this->StoutSmearing->SmearRho[idx]==0.0);
idx++;
}}
//////////////////////////////////////////////////////////////////
// Assemble the N matrix
//////////////////////////////////////////////////////////////////
// Computes ALL the staples -- could compute one only and do it here
RealD time;
time=-usecond();
this->StoutSmearing->BaseSmear(C, U);
Cmu = peekLorentz(C, mu);
//////////////////////////////////////////////////////////////////
// Assemble Luscher exp diff map J matrix
//////////////////////////////////////////////////////////////////
// Ta so Z lives in Lie algabra
Zx = Ta(Cmu * adj(Umu[mu]));
time+=usecond();
std::cout << GridLogMessage << "Z took "<<time<< " us"<<std::endl;
time=-usecond();
// Move Z to the Adjoint Rep == make_adjoint_representation
ZxAd = Zero();
for(int b=0;b<8;b++) {
// Adj group sets traceless antihermitian T's -- Guido, really????
SU3::generator(b, tb); // Fund group sets traceless hermitian T's
SU3Adjoint::generator(b,TRb);
TRb=-TRb;
cplx = 2.0*trace(ci*tb*Zx); // my convention 1/2 delta ba
ZxAd = ZxAd + cplx * TRb; // is this right? YES - Guido used Anti herm Ta's and with bloody wrong sign.
}
time+=usecond();
std::cout << GridLogMessage << "ZxAd took "<<time<< " us"<<std::endl;
//////////////////////////////////////
// J(x) = 1 + Sum_k=1..N (-Zac)^k/(k+1)!
//////////////////////////////////////
time=-usecond();
X=1.0;
JxAd = X;
mZxAd = (-1.0)*ZxAd;
RealD kpfac = 1;
for(int k=1;k<12;k++){
X=X*mZxAd;
kpfac = kpfac /(k+1);
JxAd = JxAd + X * kpfac;
}
time+=usecond();
std::cout << GridLogMessage << "Jx took "<<time<< " us"<<std::endl;
//////////////////////////////////////
// dJ(x)/dxe
//////////////////////////////////////
time=-usecond();
std::vector<AdjMatrixField> dJdX; dJdX.resize(8,grid);
AdjMatrixField tbXn(grid);
AdjMatrixField sumXtbX(grid);
AdjMatrixField t2(grid);
AdjMatrixField dt2(grid);
AdjMatrixField t3(grid);
AdjMatrixField dt3(grid);
AdjMatrixField aunit(grid);
for(int b=0;b<8;b++){
aunit = ComplexD(1.0);
SU3Adjoint::generator(b, TRb); //dt2
X = (-1.0)*ZxAd;
t2 = X;
dt2 = TRb;
for (int j = 20; j > 1; --j) {
t3 = t2*(1.0 / (j + 1)) + aunit;
dt3 = dt2*(1.0 / (j + 1));
t2 = X * t3;
dt2 = TRb * t3 + X * dt3;
}
dJdX[b] = -dt2;
}
time+=usecond();
std::cout << GridLogMessage << "dJx took "<<time<< " us"<<std::endl;
/////////////////////////////////////////////////////////////////
// Mask Umu for this link
/////////////////////////////////////////////////////////////////
time=-usecond();
PlaqL = Ident;
PlaqR = Utmp*adj(Cmu);
ComputeNxy(PlaqL,PlaqR,NxxAd);
time+=usecond();
std::cout << GridLogMessage << "ComputeNxy took "<<time<< " us"<<std::endl;
////////////////////////////
// Mab
////////////////////////////
MpAd = Complex(1.0,0.0);
MpAd = MpAd - JxAd * NxxAd;
/////////////////////////
// invert the 8x8
/////////////////////////
time=-usecond();
MpAdInv = Inverse(MpAd);
time+=usecond();
std::cout << GridLogMessage << "MpAdInv took "<<time<< " us"<<std::endl;
RealD t3a = usecond();
/////////////////////////////////////////////////////////////////
// Nxx Mp^-1
/////////////////////////////////////////////////////////////////
AdjVectorField FdetV(grid);
AdjVectorField Fdet1_nu(grid);
AdjVectorField Fdet2_nu(grid);
AdjVectorField Fdet2_mu(grid);
AdjVectorField Fdet1_mu(grid);
AdjMatrixField nMpInv(grid);
nMpInv= NxxAd *MpAdInv;
AdjMatrixField MpInvJx(grid);
AdjMatrixField MpInvJx_nu(grid);
MpInvJx = (-1.0)*MpAdInv * JxAd;// rho is on the plaq factor
Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx,FdetV);
Fdet2_mu=FdetV;
Fdet1_mu=Zero();
for(int e =0 ; e<8 ; e++){
LatticeComplexD tr(grid);
ColourMatrix te;
SU3::generator(e, te);
tr = trace(dJdX[e] * nMpInv);
pokeColour(dJdXe_nMpInv,tr,e);
}
///////////////////////////////
// Mask it off
///////////////////////////////
auto tmp=PeekIndex<LorentzIndex>(masks[smr],mu);
dJdXe_nMpInv = dJdXe_nMpInv*tmp;
// dJdXe_nMpInv needs to multiply:
// Nxx_mu (site local) (1)
// Nxy_mu one site forward in each nu direction (3)
// Nxy_mu one site backward in each nu direction (3)
// Nxy_nu 0,0 ; +mu,0; 0,-nu; +mu-nu [ 3x4 = 12]
// 19 terms.
AdjMatrixField Nxy(grid);
GaugeField Fdet1(grid);
GaugeField Fdet2(grid);
GaugeLinkField Fdet_pol(grid); // one polarisation
RealD t4 = usecond();
for(int nu=0;nu<Nd;nu++){
if (nu!=mu) {
///////////////// +ve nu /////////////////
// __
// | |
// x== // nu polarisation -- clockwise
time=-usecond();
PlaqL=Ident;
PlaqR=(-rho)*Gimpl::CovShiftForward(Umu[nu], nu,
Gimpl::CovShiftForward(Umu[mu], mu,
Gimpl::CovShiftBackward(Umu[nu], nu,
Gimpl::CovShiftIdentityBackward(Utmp, mu))));
time+=usecond();
std::cout << GridLogMessage << "PlaqLR took "<<time<< " us"<<std::endl;
time=-usecond();
dJdXe_nMpInv_y = dJdXe_nMpInv;
ComputeNxy(PlaqL,PlaqR,Nxy);
Fdet1_nu = transpose(Nxy)*dJdXe_nMpInv_y;
time+=usecond();
std::cout << GridLogMessage << "ComputeNxy (occurs 6x) took "<<time<< " us"<<std::endl;
time=-usecond();
PlaqR=(-1.0)*PlaqR;
Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx,FdetV);
Fdet2_nu = FdetV;
time+=usecond();
std::cout << GridLogMessage << "Compute_MpInvJx_dNxxSy (occurs 6x) took "<<time<< " us"<<std::endl;
// x==
// | |
// .__| // nu polarisation -- anticlockwise
PlaqR=(rho)*Gimpl::CovShiftForward(Umu[nu], nu,
Gimpl::CovShiftBackward(Umu[mu], mu,
Gimpl::CovShiftIdentityBackward(Umu[nu], nu)));
PlaqL=Gimpl::CovShiftIdentityBackward(Utmp, mu);
dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,mu,-1);
ComputeNxy(PlaqL, PlaqR,Nxy);
Fdet1_nu = Fdet1_nu+transpose(Nxy)*dJdXe_nMpInv_y;
MpInvJx_nu = Cshift(MpInvJx,mu,-1);
Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV);
Fdet2_nu = Fdet2_nu+FdetV;
///////////////// -ve nu /////////////////
// __
// | |
// x== // nu polarisation -- clockwise
PlaqL=(rho)* Gimpl::CovShiftForward(Umu[mu], mu,
Gimpl::CovShiftForward(Umu[nu], nu,
Gimpl::CovShiftIdentityBackward(Utmp, mu)));
PlaqR = Gimpl::CovShiftIdentityForward(Umu[nu], nu);
dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,1);
ComputeNxy(PlaqL,PlaqR,Nxy);
Fdet1_nu = Fdet1_nu + transpose(Nxy)*dJdXe_nMpInv_y;
MpInvJx_nu = Cshift(MpInvJx,nu,1);
Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV);
Fdet2_nu = Fdet2_nu+FdetV;
// x==
// | |
// |__| // nu polarisation
PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[nu], nu,
Gimpl::CovShiftIdentityBackward(Utmp, mu));
PlaqR=Gimpl::CovShiftBackward(Umu[mu], mu,
Gimpl::CovShiftIdentityForward(Umu[nu], nu));
dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,mu,-1);
dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv_y,nu,1);
ComputeNxy(PlaqL,PlaqR,Nxy);
Fdet1_nu = Fdet1_nu + transpose(Nxy)*dJdXe_nMpInv_y;
MpInvJx_nu = Cshift(MpInvJx,mu,-1);
MpInvJx_nu = Cshift(MpInvJx_nu,nu,1);
Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV);
Fdet2_nu = Fdet2_nu+FdetV;
/////////////////////////////////////////////////////////////////////
// Set up the determinant force contribution in 3x3 algebra basis
/////////////////////////////////////////////////////////////////////
InsertForce(Fdet1,Fdet1_nu,nu);
InsertForce(Fdet2,Fdet2_nu,nu);
//////////////////////////////////////////////////
// Parallel direction terms
//////////////////////////////////////////////////
// __
// | "
// |__"x // mu polarisation
PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[mu], mu,
Gimpl::CovShiftBackward(Umu[nu], nu,
Gimpl::CovShiftIdentityBackward(Utmp, mu)));
PlaqR=Gimpl::CovShiftIdentityBackward(Umu[nu], nu);
dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,-1);
ComputeNxy(PlaqL,PlaqR,Nxy);
Fdet1_mu = Fdet1_mu + transpose(Nxy)*dJdXe_nMpInv_y;
MpInvJx_nu = Cshift(MpInvJx,nu,-1);
Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV);
Fdet2_mu = Fdet2_mu+FdetV;
// __
// " |
// x__| // mu polarisation
PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[mu], mu,
Gimpl::CovShiftForward(Umu[nu], nu,
Gimpl::CovShiftIdentityBackward(Utmp, mu)));
PlaqR=Gimpl::CovShiftIdentityForward(Umu[nu], nu);
dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,1);
ComputeNxy(PlaqL,PlaqR,Nxy);
Fdet1_mu = Fdet1_mu + transpose(Nxy)*dJdXe_nMpInv_y;
MpInvJx_nu = Cshift(MpInvJx,nu,1);
Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV);
Fdet2_mu = Fdet2_mu+FdetV;
}
}
RealD t5 = usecond();
Fdet1_mu = Fdet1_mu + transpose(NxxAd)*dJdXe_nMpInv;
InsertForce(Fdet1,Fdet1_mu,mu);
InsertForce(Fdet2,Fdet2_mu,mu);
force= (-0.5)*( Fdet1 + Fdet2);
RealD t1 = usecond();
std::cout << GridLogMessage << " logDetJacobianForce level took "<<t1-t0<<" us "<<std::endl;
std::cout << GridLogMessage << " logDetJacobianForce t3-t0 "<<t3a-t0<<" us "<<std::endl;
std::cout << GridLogMessage << " logDetJacobianForce t4-t3 dJdXe_nMpInv "<<t4-t3a<<" us "<<std::endl;
std::cout << GridLogMessage << " logDetJacobianForce t5-t4 mu nu loop "<<t5-t4<<" us "<<std::endl;
std::cout << GridLogMessage << " logDetJacobianForce t1-t5 "<<t1-t5<<" us "<<std::endl;
std::cout << GridLogMessage << " logDetJacobianForce level took "<<t1-t0<<" us "<<std::endl;
}
RealD logDetJacobianLevel(const GaugeField &U,int smr)
{
GridBase* grid = U.Grid();
GaugeField C(grid);
GaugeLinkField Nb(grid);
GaugeLinkField Z(grid);
GaugeLinkField Umu(grid), Cmu(grid);
ColourMatrix Tb;
ColourMatrix Tc;
typedef typename SU3Adjoint::AMatrix AdjMatrix;
typedef typename SU3Adjoint::LatticeAdjMatrix AdjMatrixField;
typedef typename SU3Adjoint::LatticeAdjVector AdjVectorField;
const int Ngen = SU3Adjoint::Dimension;
AdjMatrix TRb;
LatticeComplex cplx(grid);
AdjVectorField AlgV(grid);
AdjMatrixField Mab(grid);
AdjMatrixField Ncb(grid);
AdjMatrixField Jac(grid);
AdjMatrixField Zac(grid);
AdjMatrixField mZac(grid);
AdjMatrixField X(grid);
int mu= (smr/2) %Nd;
auto mask=PeekIndex<LorentzIndex>(masks[smr],mu); // the cb mask
//////////////////////////////////////////////////////////////////
// Assemble the N matrix
//////////////////////////////////////////////////////////////////
// Computes ALL the staples -- could compute one only here
this->StoutSmearing->BaseSmear(C, U);
Cmu = peekLorentz(C, mu);
Umu = peekLorentz(U, mu);
Complex ci(0,1);
for(int b=0;b<Ngen;b++) {
SU3::generator(b, Tb);
// Qlat Tb = 2i Tb^Grid
Nb = (2.0)*Ta( ci*Tb * Umu * adj(Cmu));
for(int c=0;c<Ngen;c++) {
SU3::generator(c, Tc);
auto tmp = -trace(ci*Tc*Nb); // Luchang's norm: (2Tc) (2Td) N^db = -2 delta cd N^db // - was important
PokeIndex<ColourIndex>(Ncb,tmp,c,b);
}
}
//////////////////////////////////////////////////////////////////
// Assemble Luscher exp diff map J matrix
//////////////////////////////////////////////////////////////////
// Ta so Z lives in Lie algabra
Z = Ta(Cmu * adj(Umu));
// Move Z to the Adjoint Rep == make_adjoint_representation
Zac = Zero();
for(int b=0;b<8;b++) {
// Adj group sets traceless antihermitian T's -- Guido, really????
// Is the mapping of these the same? Same structure constants
// Might never have been checked.
SU3::generator(b, Tb); // Fund group sets traceless hermitian T's
SU3Adjoint::generator(b,TRb);
TRb=-TRb;
cplx = 2.0*trace(ci*Tb*Z); // my convention 1/2 delta ba
Zac = Zac + cplx * TRb; // is this right? YES - Guido used Anti herm Ta's and with bloody wrong sign.
}
//////////////////////////////////////
// J(x) = 1 + Sum_k=1..N (-Zac)^k/(k+1)!
//////////////////////////////////////
X=1.0;
Jac = X;
mZac = (-1.0)*Zac;
RealD kpfac = 1;
for(int k=1;k<12;k++){
X=X*mZac;
kpfac = kpfac /(k+1);
Jac = Jac + X * kpfac;
}
////////////////////////////
// Mab
////////////////////////////
Mab = Complex(1.0,0.0);
Mab = Mab - Jac * Ncb;
////////////////////////////
// det
////////////////////////////
LatticeComplex det(grid);
det = Determinant(Mab);
////////////////////////////
// ln det
////////////////////////////
LatticeComplex ln_det(grid);
ln_det = log(det);
////////////////////////////
// Masked sum
////////////////////////////
ln_det = ln_det * mask;
Complex result = sum(ln_det);
return result.real();
}
public:
RealD logDetJacobian(void)
{
RealD ln_det = 0;
if (this->smearingLevels > 0)
{
double start = usecond();
for (int ismr = this->smearingLevels - 1; ismr > 0; --ismr) {
ln_det+= logDetJacobianLevel(this->get_smeared_conf(ismr-1),ismr);
}
ln_det +=logDetJacobianLevel(*(this->ThinLinks),0);
double end = usecond();
double time = (end - start)/ 1e3;
std::cout << GridLogMessage << "GaugeConfigurationMasked: logDetJacobian took " << time << " ms" << std::endl;
}
return ln_det;
}
void logDetJacobianForce(GaugeField &force)
{
force =Zero();
GaugeField force_det(force.Grid());
if (this->smearingLevels > 0)
{
double start = usecond();
GaugeLinkField tmp_mu(force.Grid());
for (int ismr = this->smearingLevels - 1; ismr > 0; --ismr) {
// remove U in UdSdU...
for (int mu = 0; mu < Nd; mu++) {
tmp_mu = adj(peekLorentz(this->get_smeared_conf(ismr), mu)) * peekLorentz(force, mu);
pokeLorentz(force, tmp_mu, mu);
}
// Propagate existing force
force = this->AnalyticSmearedForce(force, this->get_smeared_conf(ismr - 1), ismr);
// Add back U in UdSdU...
for (int mu = 0; mu < Nd; mu++) {
tmp_mu = peekLorentz(this->get_smeared_conf(ismr - 1), mu) * peekLorentz(force, mu);
pokeLorentz(force, tmp_mu, mu);
}
// Get this levels determinant force
force_det = Zero();
logDetJacobianForceLevel(this->get_smeared_conf(ismr-1),force_det,ismr);
// Sum the contributions
force = force + force_det;
}
// remove U in UdSdU...
for (int mu = 0; mu < Nd; mu++) {
tmp_mu = adj(peekLorentz(this->get_smeared_conf(0), mu)) * peekLorentz(force, mu);
pokeLorentz(force, tmp_mu, mu);
}
force = this->AnalyticSmearedForce(force, *this->ThinLinks,0);
for (int mu = 0; mu < Nd; mu++) {
tmp_mu = peekLorentz(*this->ThinLinks, mu) * peekLorentz(force, mu);
pokeLorentz(force, tmp_mu, mu);
}
force_det = Zero();
logDetJacobianForceLevel(*this->ThinLinks,force_det,0);
force = force + force_det;
force=Ta(force); // Ta
double end = usecond();
double time = (end - start)/ 1e3;
std::cout << GridLogMessage << "GaugeConfigurationMasked: lnDetJacobianForce took " << time << " ms" << std::endl;
} // if smearingLevels = 0 do nothing
}
private:
//====================================================================
// Override base clas here to mask it
virtual void fill_smearedSet(GaugeField &U)
{
this->ThinLinks = &U; // attach the smearing routine to the field U
// check the pointer is not null
if (this->ThinLinks == NULL)
std::cout << GridLogError << "[SmearedConfigurationMasked] Error in ThinLinks pointer\n";
if (this->smearingLevels > 0)
{
std::cout << GridLogMessage << "[SmearedConfigurationMasked] Filling SmearedSet\n";
GaugeField previous_u(this->ThinLinks->Grid());
GaugeField smeared_A(this->ThinLinks->Grid());
GaugeField smeared_B(this->ThinLinks->Grid());
previous_u = *this->ThinLinks;
double start = usecond();
for (int smearLvl = 0; smearLvl < this->smearingLevels; ++smearLvl)
{
this->StoutSmearing->smear(smeared_A, previous_u);
ApplyMask(smeared_A,smearLvl);
smeared_B = previous_u;
ApplyMask(smeared_B,smearLvl);
// Replace only the masked portion
this->SmearedSet[smearLvl] = previous_u-smeared_B + smeared_A;
previous_u = this->SmearedSet[smearLvl];
// For debug purposes
RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(previous_u);
std::cout << GridLogMessage << "[SmearedConfigurationMasked] smeared Plaq: " << impl_plaq << std::endl;
}
double end = usecond();
double time = (end - start)/ 1e3;
std::cout << GridLogMessage << "GaugeConfigurationMasked: Link smearing took " << time << " ms" << std::endl;
}
}
//====================================================================
// Override base to add masking
virtual GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime,
const GaugeField& GaugeK,int level)
{
GridBase* grid = GaugeK.Grid();
GaugeField C(grid), SigmaK(grid), iLambda(grid);
GaugeField SigmaKPrimeA(grid);
GaugeField SigmaKPrimeB(grid);
GaugeLinkField iLambda_mu(grid);
GaugeLinkField iQ(grid), e_iQ(grid);
GaugeLinkField SigmaKPrime_mu(grid);
GaugeLinkField GaugeKmu(grid), Cmu(grid);
this->StoutSmearing->BaseSmear(C, GaugeK);
SigmaK = Zero();
iLambda = Zero();
SigmaKPrimeA = SigmaKPrime;
ApplyMask(SigmaKPrimeA,level);
SigmaKPrimeB = SigmaKPrime - SigmaKPrimeA;
// Could get away with computing only one polarisation here
// int mu= (smr/2) %Nd;
// SigmaKprime_A has only one component
for (int mu = 0; mu < Nd; mu++)
{
Cmu = peekLorentz(C, mu);
GaugeKmu = peekLorentz(GaugeK, mu);
SigmaKPrime_mu = peekLorentz(SigmaKPrimeA, mu);
iQ = Ta(Cmu * adj(GaugeKmu));
this->set_iLambda(iLambda_mu, e_iQ, iQ, SigmaKPrime_mu, GaugeKmu);
pokeLorentz(SigmaK, SigmaKPrime_mu * e_iQ + adj(Cmu) * iLambda_mu, mu);
pokeLorentz(iLambda, iLambda_mu, mu);
}
this->StoutSmearing->derivative(SigmaK, iLambda,GaugeK); // derivative of SmearBase
////////////////////////////////////////////////////////////////////////////////////
// propagate the rest of the force as identity map, just add back
////////////////////////////////////////////////////////////////////////////////////
SigmaK = SigmaK+SigmaKPrimeB;
return SigmaK;
}
public:
/* Standard constructor */
SmearedConfigurationMasked(GridCartesian* _UGrid, unsigned int Nsmear, Smear_Stout<Gimpl>& Stout)
: SmearedConfiguration<Gimpl>(_UGrid, Nsmear,Stout)
{
assert(Nsmear%(2*Nd)==0); // Or multiply by 8??
// was resized in base class
assert(this->SmearedSet.size()==Nsmear);
GridRedBlackCartesian * UrbGrid;
UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(_UGrid);
LatticeComplex one(_UGrid); one = ComplexD(1.0,0.0);
LatticeComplex tmp(_UGrid);
for (unsigned int i = 0; i < this->smearingLevels; ++i) {
masks.push_back(*(new LatticeLorentzComplex(_UGrid)));
int mu= (i/2) %Nd;
int cb= (i%2);
LatticeComplex tmpcb(UrbGrid);
masks[i]=Zero();
////////////////////
// Setup the mask
////////////////////
tmp = Zero();
pickCheckerboard(cb,tmpcb,one);
setCheckerboard(tmp,tmpcb);
PokeIndex<LorentzIndex>(masks[i],tmp, mu);
}
delete UrbGrid;
}
virtual void smeared_force(GaugeField &SigmaTilde)
{
if (this->smearingLevels > 0)
{
double start = usecond();
GaugeField force = SigmaTilde; // actually = U*SigmaTilde
GaugeLinkField tmp_mu(SigmaTilde.Grid());
// Remove U from UdSdU
for (int mu = 0; mu < Nd; mu++)
{
// to get just SigmaTilde
tmp_mu = adj(peekLorentz(this->SmearedSet[this->smearingLevels - 1], mu)) * peekLorentz(force, mu);
pokeLorentz(force, tmp_mu, mu);
}
for (int ismr = this->smearingLevels - 1; ismr > 0; --ismr) {
force = this->AnalyticSmearedForce(force, this->get_smeared_conf(ismr - 1),ismr);
}
force = this->AnalyticSmearedForce(force, *this->ThinLinks,0);
// Add U to UdSdU
for (int mu = 0; mu < Nd; mu++)
{
tmp_mu = peekLorentz(*this->ThinLinks, mu) * peekLorentz(force, mu);
pokeLorentz(SigmaTilde, tmp_mu, mu);
}
double end = usecond();
double time = (end - start)/ 1e3;
std::cout << GridLogMessage << " GaugeConfigurationMasked: Smeared Force chain rule took " << time << " ms" << std::endl;
} // if smearingLevels = 0 do nothing
SigmaTilde=Gimpl::projectForce(SigmaTilde); // Ta
}
};
NAMESPACE_END(Grid);

View File

@ -0,0 +1,87 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/gauge/JacobianAction.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////////////////////////
// Jacobian Action ..
////////////////////////////////////////////////////////////////////////
template <class Gimpl>
class JacobianAction : public Action<typename Gimpl::GaugeField> {
public:
INHERIT_GIMPL_TYPES(Gimpl);
SmearedConfigurationMasked<Gimpl> * smearer;
/////////////////////////// constructors
explicit JacobianAction(SmearedConfigurationMasked<Gimpl> * _smearer ) { smearer=_smearer;};
virtual std::string action_name() {return "JacobianAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "[JacobianAction] " << std::endl;
return sstream.str();
}
//////////////////////////////////
// Usual cases are not used
//////////////////////////////////
virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG){ assert(0);};
virtual RealD S(const GaugeField &U) { assert(0); }
virtual void deriv(const GaugeField &U, GaugeField &dSdU) { assert(0); }
//////////////////////////////////
// Functions of smart configs only
//////////////////////////////////
virtual void refresh(ConfigurationBase<GaugeField> & U, GridSerialRNG &sRNG, GridParallelRNG& pRNG)
{
return;
}
virtual RealD S(ConfigurationBase<GaugeField>& U)
{
// det M = e^{ - ( - logDetM) }
assert( &U == smearer );
return -smearer->logDetJacobian();
}
virtual RealD Sinitial(ConfigurationBase<GaugeField>& U)
{
return S(U);
}
virtual void deriv(ConfigurationBase<GaugeField>& U, GaugeField& dSdU)
{
assert( &U == smearer );
smearer->logDetJacobianForce(dSdU);
}
private:
};
NAMESPACE_END(Grid);

View File

@ -40,7 +40,9 @@ template <class Gimpl>
class Smear_Stout : public Smear<Gimpl> {
private:
int OrthogDim = -1;
public:
const std::vector<double> SmearRho;
private:
// Smear<Gimpl>* ownership semantics:
// Smear<Gimpl>* passed in to constructor are owned by caller, so we don't delete them here
// Smear<Gimpl>* created within constructor need to be deleted as part of the destructor

View File

@ -37,13 +37,14 @@ NAMESPACE_BEGIN(Grid);
// Make these members of an Impl class for BC's.
namespace PeriodicBC {
//Out(x) = Link(x)*field(x+mu)
template<class covariant,class gauge> Lattice<covariant> CovShiftForward(const Lattice<gauge> &Link,
int mu,
const Lattice<covariant> &field)
{
return Link*Cshift(field,mu,1);// moves towards negative mu
}
//Out(x) = Link^dag(x-mu)*field(x-mu)
template<class covariant,class gauge> Lattice<covariant> CovShiftBackward(const Lattice<gauge> &Link,
int mu,
const Lattice<covariant> &field)
@ -52,19 +53,19 @@ namespace PeriodicBC {
tmp = adj(Link)*field;
return Cshift(tmp,mu,-1);// moves towards positive mu
}
//Out(x) = Link^dag(x-mu)
template<class gauge> Lattice<gauge>
CovShiftIdentityBackward(const Lattice<gauge> &Link, int mu)
{
return Cshift(adj(Link), mu, -1);
}
//Out(x) = Link(x)
template<class gauge> Lattice<gauge>
CovShiftIdentityForward(const Lattice<gauge> &Link, int mu)
{
return Link;
}
//Link(x) = Link(x+mu)
template<class gauge> Lattice<gauge>
ShiftStaple(const Lattice<gauge> &Link, int mu)
{

470
Grid/qcd/utils/GaugeGroup.h Normal file
View File

@ -0,0 +1,470 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/utils/GaugeGroup.h
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef QCD_UTIL_GAUGEGROUP_H
#define QCD_UTIL_GAUGEGROUP_H
// Important detail: nvcc requires all template parameters to have names.
// This is the only reason why the second template parameter has a name.
#define ONLY_IF_SU \
typename dummy_name = group_name, \
typename named_dummy = std::enable_if_t < \
std::is_same<dummy_name, group_name>::value && \
is_su<dummy_name>::value >
#define ONLY_IF_Sp \
typename dummy_name = group_name, \
typename named_dummy = std::enable_if_t < \
std::is_same<dummy_name, group_name>::value && \
is_sp<dummy_name>::value >
NAMESPACE_BEGIN(Grid);
namespace GroupName {
class SU {};
class Sp {};
} // namespace GroupName
template <typename group_name>
struct is_su {
static const bool value = false;
};
template <>
struct is_su<GroupName::SU> {
static const bool value = true;
};
template <typename group_name>
struct is_sp {
static const bool value = false;
};
template <>
struct is_sp<GroupName::Sp> {
static const bool value = true;
};
template <typename group_name>
constexpr int compute_adjoint_dimension(int ncolour);
template <>
constexpr int compute_adjoint_dimension<GroupName::SU>(int ncolour) {
return ncolour * ncolour - 1;
}
template <>
constexpr int compute_adjoint_dimension<GroupName::Sp>(int ncolour) {
return ncolour / 2 * (ncolour + 1);
}
template <int ncolour, class group_name>
class GaugeGroup {
public:
static const int Dimension = ncolour;
static const int AdjointDimension =
compute_adjoint_dimension<group_name>(ncolour);
static const int AlgebraDimension =
compute_adjoint_dimension<group_name>(ncolour);
template <typename vtype>
using iSU2Matrix = iScalar<iScalar<iMatrix<vtype, 2> > >;
template <typename vtype>
using iGroupMatrix = iScalar<iScalar<iMatrix<vtype, ncolour> > >;
template <typename vtype>
using iAlgebraVector = iScalar<iScalar<iVector<vtype, AdjointDimension> > >;
static int su2subgroups(void) { return su2subgroups(group_name()); }
//////////////////////////////////////////////////////////////////////////////////////////////////
// Types can be accessed as SU<2>::Matrix , SU<2>::vSUnMatrix,
// SU<2>::LatticeMatrix etc...
//////////////////////////////////////////////////////////////////////////////////////////////////
typedef iGroupMatrix<Complex> Matrix;
typedef iGroupMatrix<ComplexF> MatrixF;
typedef iGroupMatrix<ComplexD> MatrixD;
typedef iGroupMatrix<vComplex> vMatrix;
typedef iGroupMatrix<vComplexF> vMatrixF;
typedef iGroupMatrix<vComplexD> vMatrixD;
// For the projectors to the algebra
// these should be real...
// keeping complex for consistency with the SIMD vector types
typedef iAlgebraVector<Complex> AlgebraVector;
typedef iAlgebraVector<ComplexF> AlgebraVectorF;
typedef iAlgebraVector<ComplexD> AlgebraVectorD;
typedef iAlgebraVector<vComplex> vAlgebraVector;
typedef iAlgebraVector<vComplexF> vAlgebraVectorF;
typedef iAlgebraVector<vComplexD> vAlgebraVectorD;
typedef Lattice<vMatrix> LatticeMatrix;
typedef Lattice<vMatrixF> LatticeMatrixF;
typedef Lattice<vMatrixD> LatticeMatrixD;
typedef Lattice<vAlgebraVector> LatticeAlgebraVector;
typedef Lattice<vAlgebraVectorF> LatticeAlgebraVectorF;
typedef Lattice<vAlgebraVectorD> LatticeAlgebraVectorD;
typedef iSU2Matrix<Complex> SU2Matrix;
typedef iSU2Matrix<ComplexF> SU2MatrixF;
typedef iSU2Matrix<ComplexD> SU2MatrixD;
typedef iSU2Matrix<vComplex> vSU2Matrix;
typedef iSU2Matrix<vComplexF> vSU2MatrixF;
typedef iSU2Matrix<vComplexD> vSU2MatrixD;
typedef Lattice<vSU2Matrix> LatticeSU2Matrix;
typedef Lattice<vSU2MatrixF> LatticeSU2MatrixF;
typedef Lattice<vSU2MatrixD> LatticeSU2MatrixD;
// Private implementation details are specified in the following files:
// Grid/qcd/utils/SUn.impl
// Grid/qcd/utils/SUn.impl
// The public part of the interface follows below and refers to these
// private member functions.
#include <Grid/qcd/utils/SUn.impl.h>
#include <Grid/qcd/utils/Sp2n.impl.h>
public:
template <class cplx>
static void generator(int lieIndex, iGroupMatrix<cplx> &ta) {
return generator(lieIndex, ta, group_name());
}
static void su2SubGroupIndex(int &i1, int &i2, int su2_index) {
return su2SubGroupIndex(i1, i2, su2_index, group_name());
}
static void testGenerators(void) { testGenerators(group_name()); }
static void printGenerators(void) {
for (int gen = 0; gen < AlgebraDimension; gen++) {
Matrix ta;
generator(gen, ta);
std::cout << GridLogMessage << "Nc = " << ncolour << " t_" << gen
<< std::endl;
std::cout << GridLogMessage << ta << std::endl;
}
}
template <typename LatticeMatrixType>
static void LieRandomize(GridParallelRNG &pRNG, LatticeMatrixType &out,
double scale = 1.0) {
GridBase *grid = out.Grid();
typedef typename LatticeMatrixType::vector_type vector_type;
typedef iSinglet<vector_type> vTComplexType;
typedef Lattice<vTComplexType> LatticeComplexType;
typedef typename GridTypeMapper<
typename LatticeMatrixType::vector_object>::scalar_object MatrixType;
LatticeComplexType ca(grid);
LatticeMatrixType lie(grid);
LatticeMatrixType la(grid);
ComplexD ci(0.0, scale);
MatrixType ta;
lie = Zero();
for (int a = 0; a < AlgebraDimension; a++) {
random(pRNG, ca);
ca = (ca + conjugate(ca)) * 0.5;
ca = ca - 0.5;
generator(a, ta);
la = ci * ca * ta;
lie = lie + la; // e^{i la ta}
}
taExp(lie, out);
}
static void GaussianFundamentalLieAlgebraMatrix(GridParallelRNG &pRNG,
LatticeMatrix &out,
Real scale = 1.0) {
GridBase *grid = out.Grid();
LatticeReal ca(grid);
LatticeMatrix la(grid);
Complex ci(0.0, scale);
Matrix ta;
out = Zero();
for (int a = 0; a < AlgebraDimension; a++) {
gaussian(pRNG, ca);
generator(a, ta);
la = toComplex(ca) * ta;
out += la;
}
out *= ci;
}
static void FundamentalLieAlgebraMatrix(const LatticeAlgebraVector &h,
LatticeMatrix &out,
Real scale = 1.0) {
conformable(h, out);
GridBase *grid = out.Grid();
LatticeMatrix la(grid);
Matrix ta;
out = Zero();
for (int a = 0; a < AlgebraDimension; a++) {
generator(a, ta);
la = peekColour(h, a) * timesI(ta) * scale;
out += la;
}
}
// Projects the algebra components a lattice matrix (of dimension ncol*ncol -1
// ) inverse operation: FundamentalLieAlgebraMatrix
static void projectOnAlgebra(LatticeAlgebraVector &h_out,
const LatticeMatrix &in, Real scale = 1.0) {
conformable(h_out, in);
h_out = Zero();
Matrix Ta;
for (int a = 0; a < AlgebraDimension; a++) {
generator(a, Ta);
pokeColour(h_out, -2.0 * (trace(timesI(Ta) * in)) * scale, a);
}
}
template <class vtype>
accelerator_inline static iScalar<vtype> ProjectOnGeneralGroup(const iScalar<vtype> &r) {
return ProjectOnGeneralGroup(r, group_name());
}
template <class vtype, int N>
accelerator_inline static iVector<vtype,N> ProjectOnGeneralGroup(const iVector<vtype,N> &r) {
return ProjectOnGeneralGroup(r, group_name());
}
template <class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
accelerator_inline static iMatrix<vtype,N> ProjectOnGeneralGroup(const iMatrix<vtype,N> &arg) {
return ProjectOnGeneralGroup(arg, group_name());
}
template <int N,class vComplex_t> // Projects on the general groups U(N), Sp(2N)xZ2 i.e. determinant is allowed a complex phase.
static void ProjectOnGeneralGroup(Lattice<iVector<iScalar<iMatrix<vComplex_t, N> >, Nd> > &U) {
for (int mu = 0; mu < Nd; mu++) {
auto Umu = PeekIndex<LorentzIndex>(U, mu);
Umu = ProjectOnGeneralGroup(Umu);
}
}
template <int N,class vComplex_t>
static Lattice<iScalar<iScalar<iMatrix<vComplex_t, N> > > > ProjectOnGeneralGroup(const Lattice<iScalar<iScalar<iMatrix<vComplex_t, N> > > > &Umu) {
return ProjectOnGeneralGroup(Umu, group_name());
}
template <int N,class vComplex_t> // Projects on SU(N), Sp(2N), with unit determinant, by first projecting on general group and then enforcing unit determinant
static void ProjectOnSpecialGroup(Lattice<iScalar<iScalar<iMatrix<vComplex_t, N> > > > &Umu) {
Umu = ProjectOnGeneralGroup(Umu);
auto det = Determinant(Umu);
det = conjugate(det);
for (int i = 0; i < N; i++) {
auto element = PeekIndex<ColourIndex>(Umu, N - 1, i);
element = element * det;
PokeIndex<ColourIndex>(Umu, element, Nc - 1, i);
}
}
template <int N,class vComplex_t> // reunitarise, resimplectify... previously ProjectSUn
static void ProjectOnSpecialGroup(Lattice<iVector<iScalar<iMatrix<vComplex_t, N> >, Nd> > &U) {
// Reunitarise
for (int mu = 0; mu < Nd; mu++) {
auto Umu = PeekIndex<LorentzIndex>(U, mu);
ProjectOnSpecialGroup(Umu);
PokeIndex<LorentzIndex>(U, Umu, mu);
}
}
template <typename GaugeField>
static void HotConfiguration(GridParallelRNG &pRNG, GaugeField &out) {
typedef typename GaugeField::vector_type vector_type;
typedef iGroupMatrix<vector_type> vMatrixType;
typedef Lattice<vMatrixType> LatticeMatrixType;
LatticeMatrixType Umu(out.Grid());
LatticeMatrixType tmp(out.Grid());
for (int mu = 0; mu < Nd; mu++) {
// LieRandomize(pRNG, Umu, 1.0);
// PokeIndex<LorentzIndex>(out, Umu, mu);
gaussian(pRNG,Umu);
tmp = Ta(Umu);
taExp(tmp,Umu);
ProjectOnSpecialGroup(Umu);
// ProjectSUn(Umu);
PokeIndex<LorentzIndex>(out, Umu, mu);
}
}
template <typename GaugeField>
static void TepidConfiguration(GridParallelRNG &pRNG, GaugeField &out) {
typedef typename GaugeField::vector_type vector_type;
typedef iGroupMatrix<vector_type> vMatrixType;
typedef Lattice<vMatrixType> LatticeMatrixType;
LatticeMatrixType Umu(out.Grid());
for (int mu = 0; mu < Nd; mu++) {
LieRandomize(pRNG, Umu, 0.01);
PokeIndex<LorentzIndex>(out, Umu, mu);
}
}
template <typename GaugeField>
static void ColdConfiguration(GaugeField &out) {
typedef typename GaugeField::vector_type vector_type;
typedef iGroupMatrix<vector_type> vMatrixType;
typedef Lattice<vMatrixType> LatticeMatrixType;
LatticeMatrixType Umu(out.Grid());
Umu = 1.0;
for (int mu = 0; mu < Nd; mu++) {
PokeIndex<LorentzIndex>(out, Umu, mu);
}
}
template <typename GaugeField>
static void ColdConfiguration(GridParallelRNG &pRNG, GaugeField &out) {
ColdConfiguration(out);
}
template <typename LatticeMatrixType>
static void taProj(const LatticeMatrixType &in, LatticeMatrixType &out) {
taProj(in, out, group_name());
}
template <typename LatticeMatrixType>
static void taExp(const LatticeMatrixType &x, LatticeMatrixType &ex) {
typedef typename LatticeMatrixType::scalar_type ComplexType;
LatticeMatrixType xn(x.Grid());
RealD nfac = 1.0;
xn = x;
ex = xn + ComplexType(1.0); // 1+x
// Do a 12th order exponentiation
for (int i = 2; i <= 12; ++i) {
nfac = nfac / RealD(i); // 1/2, 1/2.3 ...
xn = xn * x; // x2, x3,x4....
ex = ex + xn * nfac; // x2/2!, x3/3!....
}
}
};
template <int ncolour>
using SU = GaugeGroup<ncolour, GroupName::SU>;
template <int ncolour>
using Sp = GaugeGroup<ncolour, GroupName::Sp>;
typedef SU<2> SU2;
typedef SU<3> SU3;
typedef SU<4> SU4;
typedef SU<5> SU5;
typedef SU<Nc> FundamentalMatrices;
typedef Sp<2> Sp2;
typedef Sp<4> Sp4;
typedef Sp<6> Sp6;
typedef Sp<8> Sp8;
template <int N,class vComplex_t>
static void ProjectSUn(Lattice<iScalar<iScalar<iMatrix<vComplex_t, N> > > > &Umu)
{
GaugeGroup<N,GroupName::SU>::ProjectOnSpecialGroup(Umu);
}
template <int N,class vComplex_t>
static void ProjectSUn(Lattice<iVector<iScalar<iMatrix<vComplex_t, N> >,Nd> > &U)
{
GaugeGroup<N,GroupName::SU>::ProjectOnSpecialGroup(U);
}
template <int N,class vComplex_t>
static void ProjectSpn(Lattice<iScalar<iScalar<iMatrix<vComplex_t, N> > > > &Umu)
{
GaugeGroup<N,GroupName::Sp>::ProjectOnSpecialGroup(Umu);
}
template <int N,class vComplex_t>
static void ProjectSpn(Lattice<iVector<iScalar<iMatrix<vComplex_t, N> >,Nd> > &U)
{
GaugeGroup<N,GroupName::Sp>::ProjectOnSpecialGroup(U);
}
// Explicit specialisation for SU(3).
static void ProjectSU3(Lattice<iScalar<iScalar<iMatrix<vComplexD, 3> > > > &Umu)
{
GridBase *grid = Umu.Grid();
const int x = 0;
const int y = 1;
const int z = 2;
// Reunitarise
Umu = ProjectOnGroup(Umu);
autoView(Umu_v, Umu, CpuWrite);
thread_for(ss, grid->oSites(), {
auto cm = Umu_v[ss];
cm()()(2, x) = adj(cm()()(0, y) * cm()()(1, z) -
cm()()(0, z) * cm()()(1, y)); // x= yz-zy
cm()()(2, y) = adj(cm()()(0, z) * cm()()(1, x) -
cm()()(0, x) * cm()()(1, z)); // y= zx-xz
cm()()(2, z) = adj(cm()()(0, x) * cm()()(1, y) -
cm()()(0, y) * cm()()(1, x)); // z= xy-yx
Umu_v[ss] = cm;
});
}
static void ProjectSU3(Lattice<iVector<iScalar<iMatrix<vComplexD, 3> >, Nd> > &U)
{
GridBase *grid = U.Grid();
// Reunitarise
for (int mu = 0; mu < Nd; mu++) {
auto Umu = PeekIndex<LorentzIndex>(U, mu);
Umu = ProjectOnGroup(Umu);
ProjectSU3(Umu);
PokeIndex<LorentzIndex>(U, Umu, mu);
}
}
NAMESPACE_END(Grid);
#endif

View File

@ -0,0 +1,371 @@
////////////////////////////////////////////////////////////////////////
//
// * Two index representation generators
//
// * Normalisation for the fundamental generators:
// trace ta tb = 1/2 delta_ab = T_F delta_ab
// T_F = 1/2 for SU(N) groups
//
//
// base for NxN two index (anti-symmetric) matrices
// normalized to 1 (d_ij is the kroenecker delta)
//
// (e^(ij)_{kl} = 1 / sqrt(2) (d_ik d_jl +/- d_jk d_il)
//
// Then the generators are written as
//
// (iT_a)^(ij)(lk) = i * ( tr[e^(ij)^dag e^(lk) T^trasp_a] +
// tr[e^(lk)e^(ij)^dag T_a] ) //
//
//
////////////////////////////////////////////////////////////////////////
// Authors: David Preti, Guido Cossu
#ifndef QCD_UTIL_GAUGEGROUPTWOINDEX_H
#define QCD_UTIL_GAUGEGROUPTWOINDEX_H
NAMESPACE_BEGIN(Grid);
enum TwoIndexSymmetry { Symmetric = 1, AntiSymmetric = -1 };
constexpr inline Real delta(int a, int b) { return (a == b) ? 1.0 : 0.0; }
namespace detail {
template <class cplx, int nc, TwoIndexSymmetry S>
struct baseOffDiagonalSpHelper;
template <class cplx, int nc>
struct baseOffDiagonalSpHelper<cplx, nc, AntiSymmetric> {
static const int ngroup = nc / 2;
static void baseOffDiagonalSp(int i, int j, iScalar<iScalar<iMatrix<cplx, nc> > > &eij) {
eij = Zero();
RealD tmp;
if ((i == ngroup + j) && (1 <= j) && (j < ngroup)) {
for (int k = 0; k < j+1; k++) {
if (k < j) {
tmp = 1 / sqrt(j * (j + 1));
eij()()(k, k + ngroup) = tmp;
eij()()(k + ngroup, k) = -tmp;
}
if (k == j) {
tmp = -j / sqrt(j * (j + 1));
eij()()(k, k + ngroup) = tmp;
eij()()(k + ngroup, k) = -tmp;
}
}
}
else if (i != ngroup + j) {
for (int k = 0; k < nc; k++)
for (int l = 0; l < nc; l++) {
eij()()(l, k) =
delta(i, k) * delta(j, l) - delta(j, k) * delta(i, l);
}
}
RealD nrm = 1. / std::sqrt(2.0);
eij = eij * nrm;
}
};
template <class cplx, int nc>
struct baseOffDiagonalSpHelper<cplx, nc, Symmetric> {
static void baseOffDiagonalSp(int i, int j, iScalar<iScalar<iMatrix<cplx, nc> > > &eij) {
eij = Zero();
for (int k = 0; k < nc; k++)
for (int l = 0; l < nc; l++)
eij()()(l, k) =
delta(i, k) * delta(j, l) + delta(j, k) * delta(i, l);
RealD nrm = 1. / std::sqrt(2.0);
eij = eij * nrm;
}
};
} // closing detail namespace
template <int ncolour, TwoIndexSymmetry S, class group_name>
class GaugeGroupTwoIndex : public GaugeGroup<ncolour, group_name> {
public:
// The chosen convention is that we are taking ncolour to be N in SU<N> but 2N
// in Sp(2N). ngroup is equal to N for SU but 2N/2 = N for Sp(2N).
static_assert(std::is_same<group_name, GroupName::SU>::value or
std::is_same<group_name, GroupName::Sp>::value,
"ngroup is only implemented for SU and Sp currently.");
static const int ngroup =
std::is_same<group_name, GroupName::SU>::value ? ncolour : ncolour / 2;
static const int Dimension =
(ncolour * (ncolour + S) / 2) + (std::is_same<group_name, GroupName::Sp>::value ? (S - 1) / 2 : 0);
static const int DimensionAS =
(ncolour * (ncolour - 1) / 2) + (std::is_same<group_name, GroupName::Sp>::value ? (- 1) : 0);
static const int DimensionS =
ncolour * (ncolour + 1) / 2;
static const int NumGenerators =
GaugeGroup<ncolour, group_name>::AlgebraDimension;
template <typename vtype>
using iGroupTwoIndexMatrix = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
typedef iGroupTwoIndexMatrix<Complex> TIMatrix;
typedef iGroupTwoIndexMatrix<ComplexF> TIMatrixF;
typedef iGroupTwoIndexMatrix<ComplexD> TIMatrixD;
typedef iGroupTwoIndexMatrix<vComplex> vTIMatrix;
typedef iGroupTwoIndexMatrix<vComplexF> vTIMatrixF;
typedef iGroupTwoIndexMatrix<vComplexD> vTIMatrixD;
typedef Lattice<vTIMatrix> LatticeTwoIndexMatrix;
typedef Lattice<vTIMatrixF> LatticeTwoIndexMatrixF;
typedef Lattice<vTIMatrixD> LatticeTwoIndexMatrixD;
typedef Lattice<iVector<iScalar<iMatrix<vComplex, Dimension> >, Nd> >
LatticeTwoIndexField;
typedef Lattice<iVector<iScalar<iMatrix<vComplexF, Dimension> >, Nd> >
LatticeTwoIndexFieldF;
typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> >
LatticeTwoIndexFieldD;
template <typename vtype>
using iGroupMatrix = iScalar<iScalar<iMatrix<vtype, ncolour> > >;
typedef iGroupMatrix<Complex> Matrix;
typedef iGroupMatrix<ComplexF> MatrixF;
typedef iGroupMatrix<ComplexD> MatrixD;
private:
template <class cplx>
static void baseDiagonal(int Index, iGroupMatrix<cplx> &eij) {
eij = Zero();
eij()()(Index - ncolour * (ncolour - 1) / 2,
Index - ncolour * (ncolour - 1) / 2) = 1.0;
}
template <class cplx>
static void baseOffDiagonal(int i, int j, iGroupMatrix<cplx> &eij, GroupName::SU) {
eij = Zero();
for (int k = 0; k < ncolour; k++)
for (int l = 0; l < ncolour; l++)
eij()()(l, k) =
delta(i, k) * delta(j, l) + S * delta(j, k) * delta(i, l);
RealD nrm = 1. / std::sqrt(2.0);
eij = eij * nrm;
}
template <class cplx>
static void baseOffDiagonal(int i, int j, iGroupMatrix<cplx> &eij, GroupName::Sp) {
detail::baseOffDiagonalSpHelper<cplx, ncolour, S>::baseOffDiagonalSp(i, j, eij);
}
public:
template <class cplx>
static void base(int Index, iGroupMatrix<cplx> &eij) {
// returns (e)^(ij)_{kl} necessary for change of base U_F -> U_R
assert(Index < Dimension);
eij = Zero();
// for the linearisation of the 2 indexes
static int a[ncolour * (ncolour - 1) / 2][2]; // store the a <-> i,j
static bool filled = false;
if (!filled) {
int counter = 0;
for (int i = 1; i < ncolour; i++) {
for (int j = 0; j < i; j++) {
if (std::is_same<group_name, GroupName::Sp>::value)
{
if (j==0 && i==ngroup+j && S==-1) {
//std::cout << "skipping" << std::endl; // for Sp2n this vanishes identically.
j = j+1;
}
}
a[counter][0] = i;
a[counter][1] = j;
counter++;
}
}
filled = true;
}
if (Index < ncolour*ncolour - DimensionS)
{
baseOffDiagonal(a[Index][0], a[Index][1], eij, group_name());
} else {
baseDiagonal(Index, eij);
}
}
static void printBase(void) {
for (int gen = 0; gen < Dimension; gen++) {
Matrix tmp;
base(gen, tmp);
std::cout << GridLogMessage << "Nc = " << ncolour << " t_" << gen
<< std::endl;
std::cout << GridLogMessage << tmp << std::endl;
}
}
template <class cplx>
static void generator(int Index, iGroupTwoIndexMatrix<cplx> &i2indTa) {
Vector<iGroupMatrix<cplx> > ta(NumGenerators);
Vector<iGroupMatrix<cplx> > eij(Dimension);
iGroupMatrix<cplx> tmp;
for (int a = 0; a < NumGenerators; a++)
GaugeGroup<ncolour, group_name>::generator(a, ta[a]);
for (int a = 0; a < Dimension; a++) base(a, eij[a]);
for (int a = 0; a < Dimension; a++) {
tmp = transpose(eij[a]*ta[Index]) + transpose(eij[a]) * ta[Index];
for (int b = 0; b < Dimension; b++) {
Complex iTr = TensorRemove(timesI(trace(tmp * eij[b])));
i2indTa()()(a, b) = iTr;
}
}
}
static void printGenerators(void) {
for (int gen = 0; gen < NumGenerators; gen++) {
TIMatrix i2indTa;
generator(gen, i2indTa);
std::cout << GridLogMessage << "Nc = " << ncolour << " t_" << gen
<< std::endl;
std::cout << GridLogMessage << i2indTa << std::endl;
}
}
static void testGenerators(void) {
TIMatrix i2indTa, i2indTb;
std::cout << GridLogMessage << "2IndexRep - Checking if traceless"
<< std::endl;
for (int a = 0; a < NumGenerators; a++) {
generator(a, i2indTa);
std::cout << GridLogMessage << a << std::endl;
assert(norm2(trace(i2indTa)) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage << "2IndexRep - Checking if antihermitean"
<< std::endl;
for (int a = 0; a < NumGenerators; a++) {
generator(a, i2indTa);
std::cout << GridLogMessage << a << std::endl;
assert(norm2(adj(i2indTa) + i2indTa) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage
<< "2IndexRep - Checking Tr[Ta*Tb]=delta(a,b)*(N +- 2)/2"
<< std::endl;
for (int a = 0; a < NumGenerators; a++) {
for (int b = 0; b < NumGenerators; b++) {
generator(a, i2indTa);
generator(b, i2indTb);
// generator returns iTa, so we need a minus sign here
Complex Tr = -TensorRemove(trace(i2indTa * i2indTb));
std::cout << GridLogMessage << "a=" << a << "b=" << b << "Tr=" << Tr
<< std::endl;
if (a == b) {
assert(real(Tr) - ((ncolour + S * 2) * 0.5) < 1e-8);
} else {
assert(real(Tr) < 1e-8);
}
assert(imag(Tr) < 1e-8);
}
}
std::cout << GridLogMessage << std::endl;
}
static void TwoIndexLieAlgebraMatrix(
const typename GaugeGroup<ncolour, group_name>::LatticeAlgebraVector &h,
LatticeTwoIndexMatrix &out, Real scale = 1.0) {
conformable(h, out);
GridBase *grid = out.Grid();
LatticeTwoIndexMatrix la(grid);
TIMatrix i2indTa;
out = Zero();
for (int a = 0; a < NumGenerators; a++) {
generator(a, i2indTa);
la = peekColour(h, a) * i2indTa;
out += la;
}
out *= scale;
}
// Projects the algebra components
// of a lattice matrix ( of dimension ncol*ncol -1 )
static void projectOnAlgebra(
typename GaugeGroup<ncolour, group_name>::LatticeAlgebraVector &h_out,
const LatticeTwoIndexMatrix &in, Real scale = 1.0) {
conformable(h_out, in);
h_out = Zero();
TIMatrix i2indTa;
Real coefficient = -2.0 / (ncolour + 2 * S) * scale;
// 2/(Nc +/- 2) for the normalization of the trace in the two index rep
for (int a = 0; a < NumGenerators; a++) {
generator(a, i2indTa);
pokeColour(h_out, real(trace(i2indTa * in)) * coefficient, a);
}
}
// a projector that keeps the generators stored to avoid the overhead of
// recomputing them
static void projector(
typename GaugeGroup<ncolour, group_name>::LatticeAlgebraVector &h_out,
const LatticeTwoIndexMatrix &in, Real scale = 1.0) {
conformable(h_out, in);
// to store the generators
static std::vector<TIMatrix> i2indTa(NumGenerators);
h_out = Zero();
static bool precalculated = false;
if (!precalculated) {
precalculated = true;
for (int a = 0; a < NumGenerators; a++) generator(a, i2indTa[a]);
}
Real coefficient =
-2.0 / (ncolour + 2 * S) * scale; // 2/(Nc +/- 2) for the normalization
// of the trace in the two index rep
for (int a = 0; a < NumGenerators; a++) {
auto tmp = real(trace(i2indTa[a] * in)) * coefficient;
pokeColour(h_out, tmp, a);
}
}
};
template <int ncolour, TwoIndexSymmetry S>
using SU_TwoIndex = GaugeGroupTwoIndex<ncolour, S, GroupName::SU>;
// Some useful type names
typedef SU_TwoIndex<Nc, Symmetric> TwoIndexSymmMatrices;
typedef SU_TwoIndex<Nc, AntiSymmetric> TwoIndexAntiSymmMatrices;
typedef SU_TwoIndex<2, Symmetric> SU2TwoIndexSymm;
typedef SU_TwoIndex<3, Symmetric> SU3TwoIndexSymm;
typedef SU_TwoIndex<4, Symmetric> SU4TwoIndexSymm;
typedef SU_TwoIndex<5, Symmetric> SU5TwoIndexSymm;
typedef SU_TwoIndex<2, AntiSymmetric> SU2TwoIndexAntiSymm;
typedef SU_TwoIndex<3, AntiSymmetric> SU3TwoIndexAntiSymm;
typedef SU_TwoIndex<4, AntiSymmetric> SU4TwoIndexAntiSymm;
typedef SU_TwoIndex<5, AntiSymmetric> SU5TwoIndexAntiSymm;
template <int ncolour, TwoIndexSymmetry S>
using Sp_TwoIndex = GaugeGroupTwoIndex<ncolour, S, GroupName::Sp>;
typedef Sp_TwoIndex<Nc, Symmetric> SpTwoIndexSymmMatrices;
typedef Sp_TwoIndex<Nc, AntiSymmetric> SpTwoIndexAntiSymmMatrices;
typedef Sp_TwoIndex<2, Symmetric> Sp2TwoIndexSymm;
typedef Sp_TwoIndex<4, Symmetric> Sp4TwoIndexSymm;
typedef Sp_TwoIndex<4, AntiSymmetric> Sp4TwoIndexAntiSymm;
NAMESPACE_END(Grid);
#endif

View File

@ -1,892 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/utils/SUn.h
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef QCD_UTIL_SUN_H
#define QCD_UTIL_SUN_H
NAMESPACE_BEGIN(Grid);
template <int ncolour>
class SU {
public:
static const int Dimension = ncolour;
static const int AdjointDimension = ncolour * ncolour - 1;
static int su2subgroups(void) { return (ncolour * (ncolour - 1)) / 2; }
template <typename vtype>
using iSUnMatrix = iScalar<iScalar<iMatrix<vtype, ncolour> > >;
template <typename vtype>
using iSU2Matrix = iScalar<iScalar<iMatrix<vtype, 2> > >;
template <typename vtype>
using iSUnAlgebraVector =
iScalar<iScalar<iVector<vtype, AdjointDimension> > >;
//////////////////////////////////////////////////////////////////////////////////////////////////
// Types can be accessed as SU<2>::Matrix , SU<2>::vSUnMatrix,
// SU<2>::LatticeMatrix etc...
//////////////////////////////////////////////////////////////////////////////////////////////////
typedef iSUnMatrix<Complex> Matrix;
typedef iSUnMatrix<ComplexF> MatrixF;
typedef iSUnMatrix<ComplexD> MatrixD;
typedef iSUnMatrix<vComplex> vMatrix;
typedef iSUnMatrix<vComplexF> vMatrixF;
typedef iSUnMatrix<vComplexD> vMatrixD;
// For the projectors to the algebra
// these should be real...
// keeping complex for consistency with the SIMD vector types
typedef iSUnAlgebraVector<Complex> AlgebraVector;
typedef iSUnAlgebraVector<ComplexF> AlgebraVectorF;
typedef iSUnAlgebraVector<ComplexD> AlgebraVectorD;
typedef iSUnAlgebraVector<vComplex> vAlgebraVector;
typedef iSUnAlgebraVector<vComplexF> vAlgebraVectorF;
typedef iSUnAlgebraVector<vComplexD> vAlgebraVectorD;
typedef Lattice<vMatrix> LatticeMatrix;
typedef Lattice<vMatrixF> LatticeMatrixF;
typedef Lattice<vMatrixD> LatticeMatrixD;
typedef Lattice<vAlgebraVector> LatticeAlgebraVector;
typedef Lattice<vAlgebraVectorF> LatticeAlgebraVectorF;
typedef Lattice<vAlgebraVectorD> LatticeAlgebraVectorD;
typedef iSU2Matrix<Complex> SU2Matrix;
typedef iSU2Matrix<ComplexF> SU2MatrixF;
typedef iSU2Matrix<ComplexD> SU2MatrixD;
typedef iSU2Matrix<vComplex> vSU2Matrix;
typedef iSU2Matrix<vComplexF> vSU2MatrixF;
typedef iSU2Matrix<vComplexD> vSU2MatrixD;
typedef Lattice<vSU2Matrix> LatticeSU2Matrix;
typedef Lattice<vSU2MatrixF> LatticeSU2MatrixF;
typedef Lattice<vSU2MatrixD> LatticeSU2MatrixD;
////////////////////////////////////////////////////////////////////////
// There are N^2-1 generators for SU(N).
//
// We take a traceless hermitian generator basis as follows
//
// * Normalisation: trace ta tb = 1/2 delta_ab = T_F delta_ab
// T_F = 1/2 for SU(N) groups
//
// * Off diagonal
// - pairs of rows i1,i2 behaving like pauli matrices signma_x, sigma_y
//
// - there are (Nc-1-i1) slots for i2 on each row [ x 0 x ]
// direct count off each row
//
// - Sum of all pairs is Nc(Nc-1)/2: proof arithmetic series
//
// (Nc-1) + (Nc-2)+... 1 ==> Nc*(Nc-1)/2
// 1+ 2+ + + Nc-1
//
// - There are 2 x Nc (Nc-1)/ 2 of these = Nc^2 - Nc
//
// - We enumerate the row-col pairs.
// - for each row col pair there is a (sigma_x) and a (sigma_y) like
// generator
//
//
// t^a_ij = { in 0.. Nc(Nc-1)/2 -1} => 1/2(delta_{i,i1} delta_{j,i2} +
// delta_{i,i1} delta_{j,i2})
// t^a_ij = { in Nc(Nc-1)/2 ... Nc(Nc-1) - 1} => i/2( delta_{i,i1}
// delta_{j,i2} - i delta_{i,i1} delta_{j,i2})
//
// * Diagonal; must be traceless and normalised
// - Sequence is
// N (1,-1,0,0...)
// N (1, 1,-2,0...)
// N (1, 1, 1,-3,0...)
// N (1, 1, 1, 1,-4,0...)
//
// where 1/2 = N^2 (1+.. m^2)etc.... for the m-th diagonal generator
// NB this gives the famous SU3 result for su2 index 8
//
// N= sqrt(1/2 . 1/6 ) = 1/2 . 1/sqrt(3)
//
// ( 1 )
// ( 1 ) / sqrt(3) /2 = 1/2 lambda_8
// ( -2)
//
////////////////////////////////////////////////////////////////////////
template <class cplx>
static void generator(int lieIndex, iSUnMatrix<cplx> &ta) {
// map lie index to which type of generator
int diagIndex;
int su2Index;
int sigxy;
int NNm1 = ncolour * (ncolour - 1);
if (lieIndex >= NNm1) {
diagIndex = lieIndex - NNm1;
generatorDiagonal(diagIndex, ta);
return;
}
sigxy = lieIndex & 0x1; // even or odd
su2Index = lieIndex >> 1;
if (sigxy)
generatorSigmaY(su2Index, ta);
else
generatorSigmaX(su2Index, ta);
}
template <class cplx>
static void generatorSigmaY(int su2Index, iSUnMatrix<cplx> &ta) {
ta = Zero();
int i1, i2;
su2SubGroupIndex(i1, i2, su2Index);
ta()()(i1, i2) = 1.0;
ta()()(i2, i1) = 1.0;
ta = ta * 0.5;
}
template <class cplx>
static void generatorSigmaX(int su2Index, iSUnMatrix<cplx> &ta) {
ta = Zero();
cplx i(0.0, 1.0);
int i1, i2;
su2SubGroupIndex(i1, i2, su2Index);
ta()()(i1, i2) = i;
ta()()(i2, i1) = -i;
ta = ta * 0.5;
}
template <class cplx>
static void generatorDiagonal(int diagIndex, iSUnMatrix<cplx> &ta) {
// diag ({1, 1, ..., 1}(k-times), -k, 0, 0, ...)
ta = Zero();
int k = diagIndex + 1; // diagIndex starts from 0
for (int i = 0; i <= diagIndex; i++) { // k iterations
ta()()(i, i) = 1.0;
}
ta()()(k, k) = -k; // indexing starts from 0
RealD nrm = 1.0 / std::sqrt(2.0 * k * (k + 1));
ta = ta * nrm;
}
////////////////////////////////////////////////////////////////////////
// Map a su2 subgroup number to the pair of rows that are non zero
////////////////////////////////////////////////////////////////////////
static void su2SubGroupIndex(int &i1, int &i2, int su2_index) {
assert((su2_index >= 0) && (su2_index < (ncolour * (ncolour - 1)) / 2));
int spare = su2_index;
for (i1 = 0; spare >= (ncolour - 1 - i1); i1++) {
spare = spare - (ncolour - 1 - i1); // remove the Nc-1-i1 terms
}
i2 = i1 + 1 + spare;
}
//////////////////////////////////////////////////////////////////////////////////////////
// Pull out a subgroup and project on to real coeffs x pauli basis
//////////////////////////////////////////////////////////////////////////////////////////
template <class vcplx>
static void su2Extract(Lattice<iSinglet<vcplx> > &Determinant,
Lattice<iSU2Matrix<vcplx> > &subgroup,
const Lattice<iSUnMatrix<vcplx> > &source,
int su2_index) {
GridBase *grid(source.Grid());
conformable(subgroup, source);
conformable(subgroup, Determinant);
int i0, i1;
su2SubGroupIndex(i0, i1, su2_index);
autoView( subgroup_v , subgroup,AcceleratorWrite);
autoView( source_v , source,AcceleratorRead);
autoView( Determinant_v , Determinant,AcceleratorWrite);
accelerator_for(ss, grid->oSites(), 1, {
subgroup_v[ss]()()(0, 0) = source_v[ss]()()(i0, i0);
subgroup_v[ss]()()(0, 1) = source_v[ss]()()(i0, i1);
subgroup_v[ss]()()(1, 0) = source_v[ss]()()(i1, i0);
subgroup_v[ss]()()(1, 1) = source_v[ss]()()(i1, i1);
iSU2Matrix<vcplx> Sigma = subgroup_v[ss];
Sigma = Sigma - adj(Sigma) + trace(adj(Sigma));
subgroup_v[ss] = Sigma;
// this should be purely real
Determinant_v[ss] =
Sigma()()(0, 0) * Sigma()()(1, 1) - Sigma()()(0, 1) * Sigma()()(1, 0);
});
}
//////////////////////////////////////////////////////////////////////////////////////////
// Set matrix to one and insert a pauli subgroup
//////////////////////////////////////////////////////////////////////////////////////////
template <class vcplx>
static void su2Insert(const Lattice<iSU2Matrix<vcplx> > &subgroup,
Lattice<iSUnMatrix<vcplx> > &dest, int su2_index) {
GridBase *grid(dest.Grid());
conformable(subgroup, dest);
int i0, i1;
su2SubGroupIndex(i0, i1, su2_index);
dest = 1.0; // start out with identity
autoView( dest_v , dest, AcceleratorWrite);
autoView( subgroup_v, subgroup, AcceleratorRead);
accelerator_for(ss, grid->oSites(),1,
{
dest_v[ss]()()(i0, i0) = subgroup_v[ss]()()(0, 0);
dest_v[ss]()()(i0, i1) = subgroup_v[ss]()()(0, 1);
dest_v[ss]()()(i1, i0) = subgroup_v[ss]()()(1, 0);
dest_v[ss]()()(i1, i1) = subgroup_v[ss]()()(1, 1);
});
}
///////////////////////////////////////////////
// Generate e^{ Re Tr Staple Link} dlink
//
// *** Note Staple should be appropriate linear compbination between all
// staples.
// *** If already by beta pass coefficient 1.0.
// *** This routine applies the additional 1/Nc factor that comes after trace
// in action.
//
///////////////////////////////////////////////
static void SubGroupHeatBath(GridSerialRNG &sRNG, GridParallelRNG &pRNG,
RealD beta, // coeff multiplying staple in action (with no 1/Nc)
LatticeMatrix &link,
const LatticeMatrix &barestaple, // multiplied by action coeffs so th
int su2_subgroup, int nheatbath, LatticeInteger &wheremask)
{
GridBase *grid = link.Grid();
const RealD twopi = 2.0 * M_PI;
LatticeMatrix staple(grid);
staple = barestaple * (beta / ncolour);
LatticeMatrix V(grid);
V = link * staple;
// Subgroup manipulation in the lie algebra space
LatticeSU2Matrix u(grid); // Kennedy pendleton "u" real projected normalised Sigma
LatticeSU2Matrix uinv(grid);
LatticeSU2Matrix ua(grid); // a in pauli form
LatticeSU2Matrix b(grid); // rotated matrix after hb
// Some handy constant fields
LatticeComplex ones(grid);
ones = 1.0;
LatticeComplex zeros(grid);
zeros = Zero();
LatticeReal rones(grid);
rones = 1.0;
LatticeReal rzeros(grid);
rzeros = Zero();
LatticeComplex udet(grid); // determinant of real(staple)
LatticeInteger mask_true(grid);
mask_true = 1;
LatticeInteger mask_false(grid);
mask_false = 0;
/*
PLB 156 P393 (1985) (Kennedy and Pendleton)
Note: absorb "beta" into the def of sigma compared to KP paper; staple
passed to this routine has "beta" already multiplied in
Action linear in links h and of form:
beta S = beta Sum_p (1 - 1/Nc Re Tr Plaq )
Writing Sigma = 1/Nc (beta Sigma') where sum over staples is "Sigma' "
beta S = const - beta/Nc Re Tr h Sigma'
= const - Re Tr h Sigma
Decompose h and Sigma into (1, sigma_j) ; h_i real, h^2=1, Sigma_i complex
arbitrary.
Tr h Sigma = h_i Sigma_j Tr (sigma_i sigma_j) = h_i Sigma_j 2 delta_ij
Re Tr h Sigma = 2 h_j Re Sigma_j
Normalised re Sigma_j = xi u_j
With u_j a unit vector and U can be in SU(2);
Re Tr h Sigma = 2 h_j Re Sigma_j = 2 xi (h.u)
4xi^2 = Det [ Sig - Sig^dag + 1 Tr Sigdag]
u = 1/2xi [ Sig - Sig^dag + 1 Tr Sigdag]
xi = sqrt(Det)/2;
Write a= u h in SU(2); a has pauli decomp a_j;
Note: Product b' xi is unvariant because scaling Sigma leaves
normalised vector "u" fixed; Can rescale Sigma so b' = 1.
*/
////////////////////////////////////////////////////////
// Real part of Pauli decomposition
// Note a subgroup can project to zero in cold start
////////////////////////////////////////////////////////
su2Extract(udet, u, V, su2_subgroup);
//////////////////////////////////////////////////////
// Normalising this vector if possible; else identity
//////////////////////////////////////////////////////
LatticeComplex xi(grid);
LatticeSU2Matrix lident(grid);
SU2Matrix ident = Complex(1.0);
SU2Matrix pauli1;
SU<2>::generator(0, pauli1);
SU2Matrix pauli2;
SU<2>::generator(1, pauli2);
SU2Matrix pauli3;
SU<2>::generator(2, pauli3);
pauli1 = timesI(pauli1) * 2.0;
pauli2 = timesI(pauli2) * 2.0;
pauli3 = timesI(pauli3) * 2.0;
LatticeComplex cone(grid);
LatticeReal adet(grid);
adet = abs(toReal(udet));
lident = Complex(1.0);
cone = Complex(1.0);
Real machine_epsilon = 1.0e-7;
u = where(adet > machine_epsilon, u, lident);
udet = where(adet > machine_epsilon, udet, cone);
xi = 0.5 * sqrt(udet); // 4xi^2 = Det [ Sig - Sig^dag + 1 Tr Sigdag]
u = 0.5 * u *
pow(xi, -1.0); // u = 1/2xi [ Sig - Sig^dag + 1 Tr Sigdag]
// Debug test for sanity
uinv = adj(u);
b = u * uinv - 1.0;
assert(norm2(b) < 1.0e-4);
/*
Measure: Haar measure dh has d^4a delta(1-|a^2|)
In polars:
da = da0 r^2 sin theta dr dtheta dphi delta( 1 - r^2 -a0^2)
= da0 r^2 sin theta dr dtheta dphi delta( (sqrt(1-a0^) - r)(sqrt(1-a0^) +
r) )
= da0 r/2 sin theta dr dtheta dphi delta( (sqrt(1-a0^) - r) )
Action factor Q(h) dh = e^-S[h] dh = e^{ xi Tr uh} dh // beta enters
through xi
= e^{2 xi (h.u)} dh
= e^{2 xi h0u0}.e^{2 xi h1u1}.e^{2 xi
h2u2}.e^{2 xi h3u3} dh
Therefore for each site, take xi for that site
i) generate |a0|<1 with dist
(1-a0^2)^0.5 e^{2 xi a0 } da0
Take alpha = 2 xi = 2 xi [ recall 2 beta/Nc unmod staple norm]; hence 2.0/Nc
factor in Chroma ]
A. Generate two uniformly distributed pseudo-random numbers R and R', R'',
R''' in the unit interval;
B. Set X = -(ln R)/alpha, X' =-(ln R')/alpha;
C. Set C = cos^2(2pi R"), with R" another uniform random number in [0,1] ;
D. Set A = XC;
E. Let d = X'+A;
F. If R'''^2 :> 1 - 0.5 d, go back to A;
G. Set a0 = 1 - d;
Note that in step D setting B ~ X - A and using B in place of A in step E will
generate a second independent a 0 value.
*/
/////////////////////////////////////////////////////////
// count the number of sites by picking "1"'s out of hat
/////////////////////////////////////////////////////////
Integer hit = 0;
LatticeReal rtmp(grid);
rtmp = where(wheremask, rones, rzeros);
RealD numSites = sum(rtmp);
RealD numAccepted;
LatticeInteger Accepted(grid);
Accepted = Zero();
LatticeInteger newlyAccepted(grid);
std::vector<LatticeReal> xr(4, grid);
std::vector<LatticeReal> a(4, grid);
LatticeReal d(grid);
d = Zero();
LatticeReal alpha(grid);
// std::cout<<GridLogMessage<<"xi "<<xi <<std::endl;
xi = 2.0 *xi;
alpha = toReal(xi);
do {
// A. Generate two uniformly distributed pseudo-random numbers R and R',
// R'', R''' in the unit interval;
random(pRNG, xr[0]);
random(pRNG, xr[1]);
random(pRNG, xr[2]);
random(pRNG, xr[3]);
// B. Set X = - ln R/alpha, X' = -ln R'/alpha
xr[1] = -log(xr[1]) / alpha;
xr[2] = -log(xr[2]) / alpha;
// C. Set C = cos^2(2piR'')
xr[3] = cos(xr[3] * twopi);
xr[3] = xr[3] * xr[3];
LatticeReal xrsq(grid);
// D. Set A = XC;
// E. Let d = X'+A;
xrsq = xr[2] + xr[1] * xr[3];
d = where(Accepted, d, xr[2] + xr[1] * xr[3]);
// F. If R'''^2 :> 1 - 0.5 d, go back to A;
LatticeReal thresh(grid);
thresh = 1.0 - d * 0.5;
xrsq = xr[0] * xr[0];
LatticeInteger ione(grid);
ione = 1;
LatticeInteger izero(grid);
izero = Zero();
newlyAccepted = where(xrsq < thresh, ione, izero);
Accepted = where(newlyAccepted, newlyAccepted, Accepted);
Accepted = where(wheremask, Accepted, izero);
// FIXME need an iSum for integer to avoid overload on return type??
rtmp = where(Accepted, rones, rzeros);
numAccepted = sum(rtmp);
hit++;
} while ((numAccepted < numSites) && (hit < nheatbath));
// G. Set a0 = 1 - d;
a[0] = Zero();
a[0] = where(wheremask, 1.0 - d, a[0]);
//////////////////////////////////////////
// ii) generate a_i uniform on two sphere radius (1-a0^2)^0.5
//////////////////////////////////////////
LatticeReal a123mag(grid);
a123mag = sqrt(abs(1.0 - a[0] * a[0]));
LatticeReal cos_theta(grid);
LatticeReal sin_theta(grid);
LatticeReal phi(grid);
random(pRNG, phi);
phi = phi * twopi; // uniform in [0,2pi]
random(pRNG, cos_theta);
cos_theta = (cos_theta * 2.0) - 1.0; // uniform in [-1,1]
sin_theta = sqrt(abs(1.0 - cos_theta * cos_theta));
a[1] = a123mag * sin_theta * cos(phi);
a[2] = a123mag * sin_theta * sin(phi);
a[3] = a123mag * cos_theta;
ua = toComplex(a[0]) * ident + toComplex(a[1]) * pauli1 +
toComplex(a[2]) * pauli2 + toComplex(a[3]) * pauli3;
b = 1.0;
b = where(wheremask, uinv * ua, b);
su2Insert(b, V, su2_subgroup);
// mask the assignment back based on Accptance
link = where(Accepted, V * link, link);
//////////////////////////////
// Debug Checks
// SU2 check
LatticeSU2Matrix check(grid); // rotated matrix after hb
u = Zero();
check = ua * adj(ua) - 1.0;
check = where(Accepted, check, u);
assert(norm2(check) < 1.0e-4);
check = b * adj(b) - 1.0;
check = where(Accepted, check, u);
assert(norm2(check) < 1.0e-4);
LatticeMatrix Vcheck(grid);
Vcheck = Zero();
Vcheck = where(Accepted, V * adj(V) - 1.0, Vcheck);
// std::cout<<GridLogMessage << "SU3 check " <<norm2(Vcheck)<<std::endl;
assert(norm2(Vcheck) < 1.0e-4);
// Verify the link stays in SU(3)
// std::cout<<GridLogMessage <<"Checking the modified link"<<std::endl;
Vcheck = link * adj(link) - 1.0;
assert(norm2(Vcheck) < 1.0e-4);
/////////////////////////////////
}
static void printGenerators(void) {
for (int gen = 0; gen < AdjointDimension; gen++) {
Matrix ta;
generator(gen, ta);
std::cout << GridLogMessage << "Nc = " << ncolour << " t_" << gen
<< std::endl;
std::cout << GridLogMessage << ta << std::endl;
}
}
static void testGenerators(void) {
Matrix ta;
Matrix tb;
std::cout << GridLogMessage
<< "Fundamental - Checking trace ta tb is 0.5 delta_ab"
<< std::endl;
for (int a = 0; a < AdjointDimension; a++) {
for (int b = 0; b < AdjointDimension; b++) {
generator(a, ta);
generator(b, tb);
Complex tr = TensorRemove(trace(ta * tb));
std::cout << GridLogMessage << "(" << a << "," << b << ") = " << tr
<< std::endl;
if (a == b) assert(abs(tr - Complex(0.5)) < 1.0e-6);
if (a != b) assert(abs(tr) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
}
std::cout << GridLogMessage << "Fundamental - Checking if hermitian"
<< std::endl;
for (int a = 0; a < AdjointDimension; a++) {
generator(a, ta);
std::cout << GridLogMessage << a << std::endl;
assert(norm2(ta - adj(ta)) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage << "Fundamental - Checking if traceless"
<< std::endl;
for (int a = 0; a < AdjointDimension; a++) {
generator(a, ta);
Complex tr = TensorRemove(trace(ta));
std::cout << GridLogMessage << a << " " << std::endl;
assert(abs(tr) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
}
// reunitarise??
template <typename LatticeMatrixType>
static void LieRandomize(GridParallelRNG &pRNG, LatticeMatrixType &out, double scale = 1.0)
{
GridBase *grid = out.Grid();
typedef typename LatticeMatrixType::vector_type vector_type;
typedef iSinglet<vector_type> vTComplexType;
typedef Lattice<vTComplexType> LatticeComplexType;
typedef typename GridTypeMapper<typename LatticeMatrixType::vector_object>::scalar_object MatrixType;
LatticeComplexType ca(grid);
LatticeMatrixType lie(grid);
LatticeMatrixType la(grid);
ComplexD ci(0.0, scale);
// ComplexD cone(1.0, 0.0);
MatrixType ta;
lie = Zero();
for (int a = 0; a < AdjointDimension; a++) {
random(pRNG, ca);
ca = (ca + conjugate(ca)) * 0.5;
ca = ca - 0.5;
generator(a, ta);
la = ci * ca * ta;
lie = lie + la; // e^{i la ta}
}
taExp(lie, out);
}
static void GaussianFundamentalLieAlgebraMatrix(GridParallelRNG &pRNG,
LatticeMatrix &out,
Real scale = 1.0) {
GridBase *grid = out.Grid();
LatticeReal ca(grid);
LatticeMatrix la(grid);
Complex ci(0.0, scale);
Matrix ta;
out = Zero();
for (int a = 0; a < AdjointDimension; a++) {
gaussian(pRNG, ca);
generator(a, ta);
la = toComplex(ca) * ta;
out += la;
}
out *= ci;
}
static void FundamentalLieAlgebraMatrix(const LatticeAlgebraVector &h,
LatticeMatrix &out,
Real scale = 1.0) {
conformable(h, out);
GridBase *grid = out.Grid();
LatticeMatrix la(grid);
Matrix ta;
out = Zero();
for (int a = 0; a < AdjointDimension; a++) {
generator(a, ta);
la = peekColour(h, a) * timesI(ta) * scale;
out += la;
}
}
/*
* Fundamental rep gauge xform
*/
template<typename Fundamental,typename GaugeMat>
static void GaugeTransformFundamental( Fundamental &ferm, GaugeMat &g){
GridBase *grid = ferm._grid;
conformable(grid,g._grid);
ferm = g*ferm;
}
/*
* Adjoint rep gauge xform
*/
template<typename Gimpl>
static void GaugeTransform(typename Gimpl::GaugeField &Umu, typename Gimpl::GaugeLinkField &g){
GridBase *grid = Umu.Grid();
conformable(grid,g.Grid());
typename Gimpl::GaugeLinkField U(grid);
typename Gimpl::GaugeLinkField ag(grid); ag = adj(g);
for(int mu=0;mu<Nd;mu++){
U= PeekIndex<LorentzIndex>(Umu,mu);
U = g*U*Gimpl::CshiftLink(ag, mu, 1); //BC-aware
PokeIndex<LorentzIndex>(Umu,U,mu);
}
}
template<typename Gimpl>
static void GaugeTransform( std::vector<typename Gimpl::GaugeLinkField> &U, typename Gimpl::GaugeLinkField &g){
GridBase *grid = g.Grid();
typename Gimpl::GaugeLinkField ag(grid); ag = adj(g);
for(int mu=0;mu<Nd;mu++){
U[mu] = g*U[mu]*Gimpl::CshiftLink(ag, mu, 1); //BC-aware
}
}
template<typename Gimpl>
static void RandomGaugeTransform(GridParallelRNG &pRNG, typename Gimpl::GaugeField &Umu, typename Gimpl::GaugeLinkField &g){
LieRandomize(pRNG,g,1.0);
GaugeTransform<Gimpl>(Umu,g);
}
// Projects the algebra components a lattice matrix (of dimension ncol*ncol -1 )
// inverse operation: FundamentalLieAlgebraMatrix
static void projectOnAlgebra(LatticeAlgebraVector &h_out, const LatticeMatrix &in, Real scale = 1.0) {
conformable(h_out, in);
h_out = Zero();
Matrix Ta;
for (int a = 0; a < AdjointDimension; a++) {
generator(a, Ta);
pokeColour(h_out, - 2.0 * (trace(timesI(Ta) * in)) * scale, a);
}
}
template <typename GaugeField>
static void HotConfiguration(GridParallelRNG &pRNG, GaugeField &out) {
typedef typename GaugeField::vector_type vector_type;
typedef iSUnMatrix<vector_type> vMatrixType;
typedef Lattice<vMatrixType> LatticeMatrixType;
LatticeMatrixType Umu(out.Grid());
for (int mu = 0; mu < Nd; mu++) {
LieRandomize(pRNG, Umu, 1.0);
PokeIndex<LorentzIndex>(out, Umu, mu);
}
}
template<typename GaugeField>
static void TepidConfiguration(GridParallelRNG &pRNG,GaugeField &out){
typedef typename GaugeField::vector_type vector_type;
typedef iSUnMatrix<vector_type> vMatrixType;
typedef Lattice<vMatrixType> LatticeMatrixType;
LatticeMatrixType Umu(out.Grid());
for(int mu=0;mu<Nd;mu++){
LieRandomize(pRNG,Umu,0.01);
PokeIndex<LorentzIndex>(out,Umu,mu);
}
}
template<typename GaugeField>
static void ColdConfiguration(GaugeField &out){
typedef typename GaugeField::vector_type vector_type;
typedef iSUnMatrix<vector_type> vMatrixType;
typedef Lattice<vMatrixType> LatticeMatrixType;
LatticeMatrixType Umu(out.Grid());
Umu=1.0;
for(int mu=0;mu<Nd;mu++){
PokeIndex<LorentzIndex>(out,Umu,mu);
}
}
template<typename GaugeField>
static void ColdConfiguration(GridParallelRNG &pRNG,GaugeField &out){
ColdConfiguration(out);
}
template<typename LatticeMatrixType>
static void taProj( const LatticeMatrixType &in, LatticeMatrixType &out){
out = Ta(in);
}
template <typename LatticeMatrixType>
static void taExp(const LatticeMatrixType &x, LatticeMatrixType &ex) {
typedef typename LatticeMatrixType::scalar_type ComplexType;
LatticeMatrixType xn(x.Grid());
RealD nfac = 1.0;
xn = x;
ex = xn + ComplexType(1.0); // 1+x
// Do a 12th order exponentiation
for (int i = 2; i <= 12; ++i) {
nfac = nfac / RealD(i); // 1/2, 1/2.3 ...
xn = xn * x; // x2, x3,x4....
ex = ex + xn * nfac; // x2/2!, x3/3!....
}
}
};
template<int N>
LatticeComplexD Determinant(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
{
GridBase *grid=Umu.Grid();
auto lvol = grid->lSites();
LatticeComplexD ret(grid);
autoView(Umu_v,Umu,CpuRead);
autoView(ret_v,ret,CpuWrite);
thread_for(site,lvol,{
Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
iScalar<iScalar<iMatrix<ComplexD, N> > > Us;
peekLocalSite(Us, Umu_v, lcoor);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
EigenU(i,j) = Us()()(i,j);
}}
ComplexD det = EigenU.determinant();
pokeLocalSite(det,ret_v,lcoor);
});
return ret;
}
template<int N>
static void ProjectSUn(Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
{
Umu = ProjectOnGroup(Umu);
auto det = Determinant(Umu);
det = conjugate(det);
for(int i=0;i<N;i++){
auto element = PeekIndex<ColourIndex>(Umu,N-1,i);
element = element * det;
PokeIndex<ColourIndex>(Umu,element,Nc-1,i);
}
}
template<int N>
static void ProjectSUn(Lattice<iVector<iScalar<iMatrix<vComplexD, N> >,Nd> > &U)
{
GridBase *grid=U.Grid();
// Reunitarise
for(int mu=0;mu<Nd;mu++){
auto Umu = PeekIndex<LorentzIndex>(U,mu);
Umu = ProjectOnGroup(Umu);
ProjectSUn(Umu);
PokeIndex<LorentzIndex>(U,Umu,mu);
}
}
// Explicit specialisation for SU(3).
// Explicit specialisation for SU(3).
static void
ProjectSU3 (Lattice<iScalar<iScalar<iMatrix<vComplexD, 3> > > > &Umu)
{
GridBase *grid=Umu.Grid();
const int x=0;
const int y=1;
const int z=2;
// Reunitarise
Umu = ProjectOnGroup(Umu);
autoView(Umu_v,Umu,CpuWrite);
thread_for(ss,grid->oSites(),{
auto cm = Umu_v[ss];
cm()()(2,x) = adj(cm()()(0,y)*cm()()(1,z)-cm()()(0,z)*cm()()(1,y)); //x= yz-zy
cm()()(2,y) = adj(cm()()(0,z)*cm()()(1,x)-cm()()(0,x)*cm()()(1,z)); //y= zx-xz
cm()()(2,z) = adj(cm()()(0,x)*cm()()(1,y)-cm()()(0,y)*cm()()(1,x)); //z= xy-yx
Umu_v[ss]=cm;
});
}
static void ProjectSU3(Lattice<iVector<iScalar<iMatrix<vComplexD, 3> >,Nd> > &U)
{
GridBase *grid=U.Grid();
// Reunitarise
for(int mu=0;mu<Nd;mu++){
auto Umu = PeekIndex<LorentzIndex>(U,mu);
Umu = ProjectOnGroup(Umu);
ProjectSU3(Umu);
PokeIndex<LorentzIndex>(U,Umu,mu);
}
}
typedef SU<2> SU2;
typedef SU<3> SU3;
typedef SU<4> SU4;
typedef SU<5> SU5;
typedef SU<Nc> FundamentalMatrices;
NAMESPACE_END(Grid);
#endif

578
Grid/qcd/utils/SUn.impl.h Normal file
View File

@ -0,0 +1,578 @@
// This file is #included into the body of the class template definition of
// GaugeGroup. So, image there to be
//
// template <int ncolour, class group_name>
// class GaugeGroup {
//
// around it.
//
// Please note that the unconventional file extension makes sure that it
// doesn't get found by the scripts/filelist during bootstrapping.
private:
template <ONLY_IF_SU>
static int su2subgroups(GroupName::SU) { return (ncolour * (ncolour - 1)) / 2; }
////////////////////////////////////////////////////////////////////////
// There are N^2-1 generators for SU(N).
//
// We take a traceless hermitian generator basis as follows
//
// * Normalisation: trace ta tb = 1/2 delta_ab = T_F delta_ab
// T_F = 1/2 for SU(N) groups
//
// * Off diagonal
// - pairs of rows i1,i2 behaving like pauli matrices signma_x, sigma_y
//
// - there are (Nc-1-i1) slots for i2 on each row [ x 0 x ]
// direct count off each row
//
// - Sum of all pairs is Nc(Nc-1)/2: proof arithmetic series
//
// (Nc-1) + (Nc-2)+... 1 ==> Nc*(Nc-1)/2
// 1+ 2+ + + Nc-1
//
// - There are 2 x Nc (Nc-1)/ 2 of these = Nc^2 - Nc
//
// - We enumerate the row-col pairs.
// - for each row col pair there is a (sigma_x) and a (sigma_y) like
// generator
//
//
// t^a_ij = { in 0.. Nc(Nc-1)/2 -1} => 1/2(delta_{i,i1} delta_{j,i2} +
// delta_{i,i1} delta_{j,i2})
// t^a_ij = { in Nc(Nc-1)/2 ... Nc(Nc-1) - 1} => i/2( delta_{i,i1}
// delta_{j,i2} - i delta_{i,i1} delta_{j,i2})
//
// * Diagonal; must be traceless and normalised
// - Sequence is
// N (1,-1,0,0...)
// N (1, 1,-2,0...)
// N (1, 1, 1,-3,0...)
// N (1, 1, 1, 1,-4,0...)
//
// where 1/2 = N^2 (1+.. m^2)etc.... for the m-th diagonal generator
// NB this gives the famous SU3 result for su2 index 8
//
// N= sqrt(1/2 . 1/6 ) = 1/2 . 1/sqrt(3)
//
// ( 1 )
// ( 1 ) / sqrt(3) /2 = 1/2 lambda_8
// ( -2)
//
////////////////////////////////////////////////////////////////////////
template <class cplx, ONLY_IF_SU>
static void generator(int lieIndex, iGroupMatrix<cplx> &ta, GroupName::SU) {
// map lie index to which type of generator
int diagIndex;
int su2Index;
int sigxy;
int NNm1 = ncolour * (ncolour - 1);
if (lieIndex >= NNm1) {
diagIndex = lieIndex - NNm1;
generatorDiagonal(diagIndex, ta);
return;
}
sigxy = lieIndex & 0x1; // even or odd
su2Index = lieIndex >> 1;
if (sigxy)
generatorSigmaY(su2Index, ta);
else
generatorSigmaX(su2Index, ta);
}
template <class cplx, ONLY_IF_SU>
static void generatorSigmaY(int su2Index, iGroupMatrix<cplx> &ta) {
ta = Zero();
int i1, i2;
su2SubGroupIndex(i1, i2, su2Index);
ta()()(i1, i2) = 1.0;
ta()()(i2, i1) = 1.0;
ta = ta * 0.5;
}
template <class cplx, ONLY_IF_SU>
static void generatorSigmaX(int su2Index, iGroupMatrix<cplx> &ta) {
ta = Zero();
cplx i(0.0, 1.0);
int i1, i2;
su2SubGroupIndex(i1, i2, su2Index);
ta()()(i1, i2) = i;
ta()()(i2, i1) = -i;
ta = ta * 0.5;
}
template <class cplx, ONLY_IF_SU>
static void generatorDiagonal(int diagIndex, iGroupMatrix<cplx> &ta) {
// diag ({1, 1, ..., 1}(k-times), -k, 0, 0, ...)
ta = Zero();
int k = diagIndex + 1; // diagIndex starts from 0
for (int i = 0; i <= diagIndex; i++) { // k iterations
ta()()(i, i) = 1.0;
}
ta()()(k, k) = -k; // indexing starts from 0
RealD nrm = 1.0 / std::sqrt(2.0 * k * (k + 1));
ta = ta * nrm;
}
////////////////////////////////////////////////////////////////////////
// Map a su2 subgroup number to the pair of rows that are non zero
////////////////////////////////////////////////////////////////////////
static void su2SubGroupIndex(int &i1, int &i2, int su2_index, GroupName::SU) {
assert((su2_index >= 0) && (su2_index < (ncolour * (ncolour - 1)) / 2));
int spare = su2_index;
for (i1 = 0; spare >= (ncolour - 1 - i1); i1++) {
spare = spare - (ncolour - 1 - i1); // remove the Nc-1-i1 terms
}
i2 = i1 + 1 + spare;
}
public:
//////////////////////////////////////////////////////////////////////////////////////////
// Pull out a subgroup and project on to real coeffs x pauli basis
//////////////////////////////////////////////////////////////////////////////////////////
template <class vcplx, ONLY_IF_SU>
static void su2Extract(Lattice<iSinglet<vcplx> > &Determinant,
Lattice<iSU2Matrix<vcplx> > &subgroup,
const Lattice<iGroupMatrix<vcplx> > &source,
int su2_index) {
GridBase *grid(source.Grid());
conformable(subgroup, source);
conformable(subgroup, Determinant);
int i0, i1;
su2SubGroupIndex(i0, i1, su2_index);
autoView(subgroup_v, subgroup, AcceleratorWrite);
autoView(source_v, source, AcceleratorRead);
autoView(Determinant_v, Determinant, AcceleratorWrite);
accelerator_for(ss, grid->oSites(), 1, {
subgroup_v[ss]()()(0, 0) = source_v[ss]()()(i0, i0);
subgroup_v[ss]()()(0, 1) = source_v[ss]()()(i0, i1);
subgroup_v[ss]()()(1, 0) = source_v[ss]()()(i1, i0);
subgroup_v[ss]()()(1, 1) = source_v[ss]()()(i1, i1);
iSU2Matrix<vcplx> Sigma = subgroup_v[ss];
Sigma = Sigma - adj(Sigma) + trace(adj(Sigma));
subgroup_v[ss] = Sigma;
// this should be purely real
Determinant_v[ss] =
Sigma()()(0, 0) * Sigma()()(1, 1) - Sigma()()(0, 1) * Sigma()()(1, 0);
});
}
//////////////////////////////////////////////////////////////////////////////////////////
// Set matrix to one and insert a pauli subgroup
//////////////////////////////////////////////////////////////////////////////////////////
template <class vcplx, ONLY_IF_SU>
static void su2Insert(const Lattice<iSU2Matrix<vcplx> > &subgroup,
Lattice<iGroupMatrix<vcplx> > &dest, int su2_index) {
GridBase *grid(dest.Grid());
conformable(subgroup, dest);
int i0, i1;
su2SubGroupIndex(i0, i1, su2_index);
dest = 1.0; // start out with identity
autoView(dest_v, dest, AcceleratorWrite);
autoView(subgroup_v, subgroup, AcceleratorRead);
accelerator_for(ss, grid->oSites(), 1, {
dest_v[ss]()()(i0, i0) = subgroup_v[ss]()()(0, 0);
dest_v[ss]()()(i0, i1) = subgroup_v[ss]()()(0, 1);
dest_v[ss]()()(i1, i0) = subgroup_v[ss]()()(1, 0);
dest_v[ss]()()(i1, i1) = subgroup_v[ss]()()(1, 1);
});
}
///////////////////////////////////////////////
// Generate e^{ Re Tr Staple Link} dlink
//
// *** Note Staple should be appropriate linear compbination between all
// staples.
// *** If already by beta pass coefficient 1.0.
// *** This routine applies the additional 1/Nc factor that comes after trace
// in action.
//
///////////////////////////////////////////////
template <ONLY_IF_SU>
static void SubGroupHeatBath(
GridSerialRNG &sRNG, GridParallelRNG &pRNG,
RealD beta, // coeff multiplying staple in action (with no 1/Nc)
LatticeMatrix &link,
const LatticeMatrix &barestaple, // multiplied by action coeffs so th
int su2_subgroup, int nheatbath, LatticeInteger &wheremask) {
GridBase *grid = link.Grid();
const RealD twopi = 2.0 * M_PI;
LatticeMatrix staple(grid);
staple = barestaple * (beta / ncolour);
LatticeMatrix V(grid);
V = link * staple;
// Subgroup manipulation in the lie algebra space
LatticeSU2Matrix u(
grid); // Kennedy pendleton "u" real projected normalised Sigma
LatticeSU2Matrix uinv(grid);
LatticeSU2Matrix ua(grid); // a in pauli form
LatticeSU2Matrix b(grid); // rotated matrix after hb
// Some handy constant fields
LatticeComplex ones(grid);
ones = 1.0;
LatticeComplex zeros(grid);
zeros = Zero();
LatticeReal rones(grid);
rones = 1.0;
LatticeReal rzeros(grid);
rzeros = Zero();
LatticeComplex udet(grid); // determinant of real(staple)
LatticeInteger mask_true(grid);
mask_true = 1;
LatticeInteger mask_false(grid);
mask_false = 0;
/*
PLB 156 P393 (1985) (Kennedy and Pendleton)
Note: absorb "beta" into the def of sigma compared to KP paper; staple
passed to this routine has "beta" already multiplied in
Action linear in links h and of form:
beta S = beta Sum_p (1 - 1/Nc Re Tr Plaq )
Writing Sigma = 1/Nc (beta Sigma') where sum over staples is "Sigma' "
beta S = const - beta/Nc Re Tr h Sigma'
= const - Re Tr h Sigma
Decompose h and Sigma into (1, sigma_j) ; h_i real, h^2=1, Sigma_i complex
arbitrary.
Tr h Sigma = h_i Sigma_j Tr (sigma_i sigma_j) = h_i Sigma_j 2 delta_ij
Re Tr h Sigma = 2 h_j Re Sigma_j
Normalised re Sigma_j = xi u_j
With u_j a unit vector and U can be in SU(2);
Re Tr h Sigma = 2 h_j Re Sigma_j = 2 xi (h.u)
4xi^2 = Det [ Sig - Sig^dag + 1 Tr Sigdag]
u = 1/2xi [ Sig - Sig^dag + 1 Tr Sigdag]
xi = sqrt(Det)/2;
Write a= u h in SU(2); a has pauli decomp a_j;
Note: Product b' xi is unvariant because scaling Sigma leaves
normalised vector "u" fixed; Can rescale Sigma so b' = 1.
*/
////////////////////////////////////////////////////////
// Real part of Pauli decomposition
// Note a subgroup can project to zero in cold start
////////////////////////////////////////////////////////
su2Extract(udet, u, V, su2_subgroup);
//////////////////////////////////////////////////////
// Normalising this vector if possible; else identity
//////////////////////////////////////////////////////
LatticeComplex xi(grid);
LatticeSU2Matrix lident(grid);
SU2Matrix ident = Complex(1.0);
SU2Matrix pauli1;
GaugeGroup<2, GroupName::SU>::generator(0, pauli1);
SU2Matrix pauli2;
GaugeGroup<2, GroupName::SU>::generator(1, pauli2);
SU2Matrix pauli3;
GaugeGroup<2, GroupName::SU>::generator(2, pauli3);
pauli1 = timesI(pauli1) * 2.0;
pauli2 = timesI(pauli2) * 2.0;
pauli3 = timesI(pauli3) * 2.0;
LatticeComplex cone(grid);
LatticeReal adet(grid);
adet = abs(toReal(udet));
lident = Complex(1.0);
cone = Complex(1.0);
Real machine_epsilon = 1.0e-7;
u = where(adet > machine_epsilon, u, lident);
udet = where(adet > machine_epsilon, udet, cone);
xi = 0.5 * sqrt(udet); // 4xi^2 = Det [ Sig - Sig^dag + 1 Tr Sigdag]
u = 0.5 * u * pow(xi, -1.0); // u = 1/2xi [ Sig - Sig^dag + 1 Tr Sigdag]
// Debug test for sanity
uinv = adj(u);
b = u * uinv - 1.0;
assert(norm2(b) < 1.0e-4);
/*
Measure: Haar measure dh has d^4a delta(1-|a^2|)
In polars:
da = da0 r^2 sin theta dr dtheta dphi delta( 1 - r^2 -a0^2)
= da0 r^2 sin theta dr dtheta dphi delta( (sqrt(1-a0^) - r)(sqrt(1-a0^) +
r) )
= da0 r/2 sin theta dr dtheta dphi delta( (sqrt(1-a0^) - r) )
Action factor Q(h) dh = e^-S[h] dh = e^{ xi Tr uh} dh // beta
enters through xi = e^{2 xi (h.u)} dh = e^{2 xi h0u0}.e^{2 xi h1u1}.e^{2
xi h2u2}.e^{2 xi h3u3} dh
Therefore for each site, take xi for that site
i) generate |a0|<1 with dist
(1-a0^2)^0.5 e^{2 xi a0 } da0
Take alpha = 2 xi = 2 xi [ recall 2 beta/Nc unmod staple norm];
hence 2.0/Nc factor in Chroma ] A. Generate two uniformly distributed
pseudo-random numbers R and R', R'', R''' in the unit interval; B. Set X =
-(ln R)/alpha, X' =-(ln R')/alpha; C. Set C = cos^2(2pi R"), with R"
another uniform random number in [0,1] ; D. Set A = XC; E. Let d = X'+A;
F. If R'''^2 :> 1 - 0.5 d, go back to A;
G. Set a0 = 1 - d;
Note that in step D setting B ~ X - A and using B in place of A in step E
will generate a second independent a 0 value.
*/
/////////////////////////////////////////////////////////
// count the number of sites by picking "1"'s out of hat
/////////////////////////////////////////////////////////
Integer hit = 0;
LatticeReal rtmp(grid);
rtmp = where(wheremask, rones, rzeros);
RealD numSites = sum(rtmp);
RealD numAccepted;
LatticeInteger Accepted(grid);
Accepted = Zero();
LatticeInteger newlyAccepted(grid);
std::vector<LatticeReal> xr(4, grid);
std::vector<LatticeReal> a(4, grid);
LatticeReal d(grid);
d = Zero();
LatticeReal alpha(grid);
// std::cout<<GridLogMessage<<"xi "<<xi <<std::endl;
xi = 2.0 * xi;
alpha = toReal(xi);
do {
// A. Generate two uniformly distributed pseudo-random numbers R and R',
// R'', R''' in the unit interval;
random(pRNG, xr[0]);
random(pRNG, xr[1]);
random(pRNG, xr[2]);
random(pRNG, xr[3]);
// B. Set X = - ln R/alpha, X' = -ln R'/alpha
xr[1] = -log(xr[1]) / alpha;
xr[2] = -log(xr[2]) / alpha;
// C. Set C = cos^2(2piR'')
xr[3] = cos(xr[3] * twopi);
xr[3] = xr[3] * xr[3];
LatticeReal xrsq(grid);
// D. Set A = XC;
// E. Let d = X'+A;
xrsq = xr[2] + xr[1] * xr[3];
d = where(Accepted, d, xr[2] + xr[1] * xr[3]);
// F. If R'''^2 :> 1 - 0.5 d, go back to A;
LatticeReal thresh(grid);
thresh = 1.0 - d * 0.5;
xrsq = xr[0] * xr[0];
LatticeInteger ione(grid);
ione = 1;
LatticeInteger izero(grid);
izero = Zero();
newlyAccepted = where(xrsq < thresh, ione, izero);
Accepted = where(newlyAccepted, newlyAccepted, Accepted);
Accepted = where(wheremask, Accepted, izero);
// FIXME need an iSum for integer to avoid overload on return type??
rtmp = where(Accepted, rones, rzeros);
numAccepted = sum(rtmp);
hit++;
} while ((numAccepted < numSites) && (hit < nheatbath));
// G. Set a0 = 1 - d;
a[0] = Zero();
a[0] = where(wheremask, 1.0 - d, a[0]);
//////////////////////////////////////////
// ii) generate a_i uniform on two sphere radius (1-a0^2)^0.5
//////////////////////////////////////////
LatticeReal a123mag(grid);
a123mag = sqrt(abs(1.0 - a[0] * a[0]));
LatticeReal cos_theta(grid);
LatticeReal sin_theta(grid);
LatticeReal phi(grid);
random(pRNG, phi);
phi = phi * twopi; // uniform in [0,2pi]
random(pRNG, cos_theta);
cos_theta = (cos_theta * 2.0) - 1.0; // uniform in [-1,1]
sin_theta = sqrt(abs(1.0 - cos_theta * cos_theta));
a[1] = a123mag * sin_theta * cos(phi);
a[2] = a123mag * sin_theta * sin(phi);
a[3] = a123mag * cos_theta;
ua = toComplex(a[0]) * ident + toComplex(a[1]) * pauli1 +
toComplex(a[2]) * pauli2 + toComplex(a[3]) * pauli3;
b = 1.0;
b = where(wheremask, uinv * ua, b);
su2Insert(b, V, su2_subgroup);
// mask the assignment back based on Accptance
link = where(Accepted, V * link, link);
//////////////////////////////
// Debug Checks
// SU2 check
LatticeSU2Matrix check(grid); // rotated matrix after hb
u = Zero();
check = ua * adj(ua) - 1.0;
check = where(Accepted, check, u);
assert(norm2(check) < 1.0e-4);
check = b * adj(b) - 1.0;
check = where(Accepted, check, u);
assert(norm2(check) < 1.0e-4);
LatticeMatrix Vcheck(grid);
Vcheck = Zero();
Vcheck = where(Accepted, V * adj(V) - 1.0, Vcheck);
// std::cout<<GridLogMessage << "SU3 check " <<norm2(Vcheck)<<std::endl;
assert(norm2(Vcheck) < 1.0e-4);
// Verify the link stays in SU(3)
// std::cout<<GridLogMessage <<"Checking the modified link"<<std::endl;
Vcheck = link * adj(link) - 1.0;
assert(norm2(Vcheck) < 1.0e-4);
/////////////////////////////////
}
template <ONLY_IF_SU>
static void testGenerators(GroupName::SU) {
Matrix ta;
Matrix tb;
std::cout << GridLogMessage
<< "Fundamental - Checking trace ta tb is 0.5 delta_ab"
<< std::endl;
for (int a = 0; a < AdjointDimension; a++) {
for (int b = 0; b < AdjointDimension; b++) {
generator(a, ta);
generator(b, tb);
Complex tr = TensorRemove(trace(ta * tb));
std::cout << GridLogMessage << "(" << a << "," << b << ") = " << tr
<< std::endl;
if (a == b) assert(abs(tr - Complex(0.5)) < 1.0e-6);
if (a != b) assert(abs(tr) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
}
std::cout << GridLogMessage << "Fundamental - Checking if hermitian"
<< std::endl;
for (int a = 0; a < AdjointDimension; a++) {
generator(a, ta);
std::cout << GridLogMessage << a << std::endl;
assert(norm2(ta - adj(ta)) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage << "Fundamental - Checking if traceless"
<< std::endl;
for (int a = 0; a < AdjointDimension; a++) {
generator(a, ta);
Complex tr = TensorRemove(trace(ta));
std::cout << GridLogMessage << a << " " << std::endl;
assert(abs(tr) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
}
template <int N, class vtype>
static Lattice<iScalar<iScalar<iMatrix<vtype, N> > > >
ProjectOnGeneralGroup(const Lattice<iScalar<iScalar<iMatrix<vtype, N> > > > &Umu, GroupName::SU) {
return ProjectOnGroup(Umu);
}
template <class vtype>
accelerator_inline static iScalar<vtype> ProjectOnGeneralGroup(const iScalar<vtype> &r, GroupName::SU) {
return ProjectOnGroup(r);
}
template <class vtype, int N>
accelerator_inline static iVector<vtype,N> ProjectOnGeneralGroup(const iVector<vtype,N> &r, GroupName::SU) {
return ProjectOnGroup(r);
}
template <class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
accelerator_inline static iMatrix<vtype,N> ProjectOnGeneralGroup(const iMatrix<vtype,N> &arg, GroupName::SU) {
return ProjectOnGroup(arg);
}
template <typename LatticeMatrixType>
static void taProj(const LatticeMatrixType &in, LatticeMatrixType &out, GroupName::SU) {
out = Ta(in);
}
/*
* Fundamental rep gauge xform
*/
template<typename Fundamental,typename GaugeMat>
static void GaugeTransformFundamental( Fundamental &ferm, GaugeMat &g){
GridBase *grid = ferm._grid;
conformable(grid,g._grid);
ferm = g*ferm;
}
/*
* Adjoint rep gauge xform
*/
template<typename Gimpl>
static void GaugeTransform(typename Gimpl::GaugeField &Umu, typename Gimpl::GaugeLinkField &g){
GridBase *grid = Umu.Grid();
conformable(grid,g.Grid());
typename Gimpl::GaugeLinkField U(grid);
typename Gimpl::GaugeLinkField ag(grid); ag = adj(g);
for(int mu=0;mu<Nd;mu++){
U= PeekIndex<LorentzIndex>(Umu,mu);
U = g*U*Gimpl::CshiftLink(ag, mu, 1); //BC-aware
PokeIndex<LorentzIndex>(Umu,U,mu);
}
}
template<typename Gimpl>
static void GaugeTransform( std::vector<typename Gimpl::GaugeLinkField> &U, typename Gimpl::GaugeLinkField &g){
GridBase *grid = g.Grid();
typename Gimpl::GaugeLinkField ag(grid); ag = adj(g);
for(int mu=0;mu<Nd;mu++){
U[mu] = g*U[mu]*Gimpl::CshiftLink(ag, mu, 1); //BC-aware
}
}
template<typename Gimpl>
static void RandomGaugeTransform(GridParallelRNG &pRNG, typename Gimpl::GaugeField &Umu, typename Gimpl::GaugeLinkField &g){
LieRandomize(pRNG,g,1.0);
GaugeTransform<Gimpl>(Umu,g);
}

View File

@ -52,13 +52,18 @@ public:
typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> > LatticeAdjFieldD;
template <typename vtype>
using iSUnMatrix = iScalar<iScalar<iMatrix<vtype, ncolour> > >;
typedef Lattice<iScalar<iScalar<iVector<vComplex, Dimension> > > > LatticeAdjVector;
template <class cplx>
static void generator(int Index, iSUnAdjointMatrix<cplx> &iAdjTa) {
// returns i(T_Adj)^index necessary for the projectors
// see definitions above
iAdjTa = Zero();
Vector<typename SU<ncolour>::template iSUnMatrix<cplx> > ta(ncolour * ncolour - 1);
typename SU<ncolour>::template iSUnMatrix<cplx> tmp;
Vector<iSUnMatrix<cplx> > ta(ncolour * ncolour - 1);
iSUnMatrix<cplx> tmp;
// FIXME not very efficient to get all the generators everytime
for (int a = 0; a < Dimension; a++) SU<ncolour>::generator(a, ta[a]);
@ -66,8 +71,7 @@ public:
for (int a = 0; a < Dimension; a++) {
tmp = ta[a] * ta[Index] - ta[Index] * ta[a];
for (int b = 0; b < (ncolour * ncolour - 1); b++) {
typename SU<ncolour>::template iSUnMatrix<cplx> tmp1 =
2.0 * tmp * ta[b]; // 2.0 from the normalization
iSUnMatrix<cplx> tmp1 = 2.0 * tmp * ta[b]; // 2.0 from the normalization
Complex iTr = TensorRemove(timesI(trace(tmp1)));
//iAdjTa()()(b, a) = iTr;
iAdjTa()()(a, b) = iTr;
@ -133,8 +137,7 @@ public:
for (int a = 0; a < Dimension; a++) {
generator(a, iTa);
LatticeComplex tmp = real(trace(iTa * in)) * coefficient;
pokeColour(h_out, tmp, a);
pokeColour(h_out, real(trace(iTa * in)) * coefficient, a);
}
}

Some files were not shown because too many files have changed in this diff Show More