1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-19 00:07:05 +01:00

Compare commits

..

588 Commits

Author SHA1 Message Date
aa13118127 Missing conjugate already fixed in develop 2020-04-10 11:11:24 -04:00
6cdb09c884 Faster copy region 2020-04-10 11:10:52 -04:00
a65bc64f10 Accelerator peek poke 2020-04-10 11:09:59 -04:00
11dec4883c Don't throw assert 2020-04-10 11:09:11 -04:00
afa458c812 Extra solvers 2020-04-10 11:08:19 -04:00
dc50190b8f Faster GPU basis rotation
May need to later include Regensburg optimised CPU variant
2020-04-10 11:06:04 -04:00
8a5c13d5fb Still fast moving in changes 2020-02-06 17:57:26 -05:00
bdccb0c91f Working 2 types of decomposition 2020-02-06 17:26:55 -05:00
68b45f6444 Lower left/upper right region cut paste 2020-02-06 15:50:26 -05:00
ef9b3e658a extra typedef 2020-02-06 15:47:14 -05:00
b9ca40cc44 More precise power method at start 2020-02-06 10:09:14 -05:00
2f421a5db1 Commeent fix 2020-02-06 10:08:27 -05:00
852fc1b001 True Hierachical multigrid for DWF 2020-01-27 13:45:10 -05:00
2b5de5bba5 MdagM operator without norm option 2020-01-27 13:44:30 -05:00
2e85cae74e Add Jacobi polynomials 2020-01-27 13:43:49 -05:00
76c823781e Much faster coarsening 2020-01-27 13:43:19 -05:00
114db3b99d Optional MdagM without norms 2020-01-27 13:42:51 -05:00
49e123dbda Use explicit linalg calls to get coalesce optimisations on GPU 2020-01-27 12:44:51 -05:00
8cec294ec9 Make CG a bit less verbose as gettign annoying in nested algorithms.
Can use Iterative logging if you want to see more
2020-01-27 12:44:04 -05:00
eb5b720e94 Normal Equations can be used in HDCR now 2020-01-27 12:43:29 -05:00
b2736ec80b Make PrecGCR recursive - it can precondition itself 2020-01-27 12:42:48 -05:00
086256a032 Less sloppy convergence test on PowerMethod 2020-01-27 12:41:59 -05:00
afc7426f39 Much bigger pointer cache in case of Nvidia due to cost of setting up UVM allocations 2020-01-27 12:41:16 -05:00
7c061e20c9 All directions of dirac operator for fastt coarsening 2020-01-27 12:40:13 -05:00
e5d1c09665 Faster DhopDirAll for little dirac operator coarsening 2020-01-27 12:38:54 -05:00
8016a465ae Remove extraneous variable 2020-01-27 12:35:37 -05:00
d8b9742092 DhopDirAll for faster matrix elements of little Dirac operator 2020-01-27 12:34:54 -05:00
1bd87c35d7 Read coalescing on Nvidia 2020-01-27 12:29:56 -05:00
fa856c9669 Disable information message 2020-01-27 12:28:46 -05:00
48008e4d8b Thread coordinate creation loop 2020-01-27 12:28:16 -05:00
55cdb17691 Integer divide for blocking 2020-01-27 12:27:45 -05:00
554542b773 Merge branch 'feature/hdcr' of https://github.com/paboyle/Grid into feature/hdcr 2020-01-06 11:47:56 -05:00
03da4040e2 Make summit happy 2020-01-06 11:47:48 -05:00
e583035614 Change to interface to minise comms in evaluating coarse space operator 2020-01-06 11:43:59 -05:00
3c3d6a94f3 OPtimising the force term a bit 2020-01-04 03:16:23 -05:00
205ea4bbb2 More verboose Lanczos 2020-01-04 03:13:40 -05:00
039eb7b2eb Make the force term and coarsening multigrid more optimised 2020-01-04 03:12:17 -05:00
f7e4bd1f6d Getting more optimised 2020-01-04 03:11:53 -05:00
0afecfcae7 Nearing well optimised state 2020-01-04 03:11:19 -05:00
ba40a3f763 Alternate low pass filter option 2020-01-03 05:29:09 -05:00
aa920aa532 Improved DWF multigrid 2019-12-28 10:32:35 -05:00
c0d8e4dce5 Improved Multigrid for DWF 2019-12-28 10:32:15 -05:00
9cfd64c604 Coarse grid on GPU, not fast enough yet. Need a 10x 2019-12-17 05:24:45 -05:00
e478404291 Tuned up significantly on GPU, but another 10x in coarse space required 2019-12-17 05:03:25 -05:00
9aafd20468 Simple block project promote runs faster on GPU 2019-12-17 05:01:39 -05:00
9e15474999 Accelerator loop attempt at speed up 2019-12-14 05:28:16 -05:00
152b525a4d Typo fix 2019-12-13 22:44:42 -05:00
d18994eddc offload more of mgrid to GPU 2019-12-13 22:08:11 -05:00
736b19485e Faster set up and some dead code ifdef'ed out 2019-12-13 21:30:48 -05:00
5bfd1470ad Merge branch 'develop' into feature/hdcr 2019-12-10 21:51:06 -05:00
6957b0b58a Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-12-10 21:50:42 -05:00
d73f0b8618 Verbose for temporary debug 2019-12-10 21:50:06 -05:00
0b3a3562c3 Some MPI (summit) create sigusr2, so trap that 2019-12-10 21:49:12 -05:00
710fee5d26 Subspace setup testing code
and timing verbose
2019-12-10 21:48:42 -05:00
bab0bf2e93 Merge branch 'develop' into feature/hdcr 2019-12-10 21:47:41 -05:00
848079e8ba Merge pull request #235 from grid-test-organisation/feature/5d-improvement
MooeeInv and M5D optimisations + enable threading with nvcc
2019-12-10 21:45:03 -05:00
f2a4f13111 Must offload the Coarsened matrix if Stencil buffers are device resident 2019-12-10 19:32:12 -05:00
b9b9fcbfa0 Merge pull request #229 from nils-asmussen/feature/JacobiSmear
MSource::jacobi smear + sort file contents of Modules.hpp and modules.inc
2019-12-09 22:50:02 +00:00
bbe48998a8 sort Modules.hpp and modules.inc + add module JacobiSmear 2019-12-09 18:06:29 +00:00
6446671a9c Merge pull request #241 from nils-asmussen/fix/remQCDns_ignore_ws
Undo whitespace changes in fix/removeQCDremnants to allow comparing relevant changes
2019-12-09 18:02:21 +00:00
110373ea79 Merge pull request #204 from nils-asmussen/sha256sum_Eigen_download
bootstrap.sh: verify checksum of Eigen tar file
2019-12-09 18:01:46 +00:00
a986786192 bootstrap.sh: verify checksum of Eigen tar file if sha256sum is installed 2019-12-09 17:11:21 +00:00
edd1c924eb Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-12-09 03:53:01 -05:00
9b6b0caa55 Junk commit fix 2019-12-09 03:01:58 -05:00
2a48617ac5 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-12-09 03:00:00 -05:00
876d9c957c QMR 2019-12-09 02:59:49 -05:00
295e535f93 QMR 2019-12-09 02:59:35 -05:00
58a31f0763 QMR implemented, preserve even if not used much 2019-12-09 02:59:13 -05:00
3d2fe80780 Temporary size depends on checkerboard/uncheckerboard. The Mdir cares 2019-12-09 02:58:24 -05:00
e43fce1083 Clean up and simplify a little. 2019-12-09 02:55:45 -05:00
0dfdf80407 Logging 2019-12-09 02:54:52 -05:00
2912071f83 Add non hermitian operator 2019-12-09 02:51:53 -05:00
26605ef387 HDCR back to working 2019-12-09 02:51:01 -05:00
1e5ac576d9 Merge commit 'f7698b93ca57ea3aa4d72b133ad9ca5d1e703661' into develop
# Conflicts:
#	Hadrons/Modules.hpp
#	Hadrons/modules.inc
2019-12-06 11:59:21 +00:00
d5492b426f Hadrons: better order in module list 2019-12-06 11:56:26 +00:00
d428858c9d Merge pull request #255 from fionnoh/feature/sparseNoise
Feature/sparse noise
2019-12-06 11:43:27 +00:00
f7698b93ca corrected comments about quark line directions 2019-12-06 09:46:52 +00:00
7ce77690b8 Naming conventon also applied to metadata 2019-12-05 17:38:43 +00:00
164ed9c434 Naming conventon also applied to metadata 2019-12-05 17:38:00 +00:00
a54157e682 more definitions changed 2019-12-05 17:08:09 +00:00
58b6a0d8d1 changed some naming conditions to resemble rare-kaons 2019-12-05 16:56:54 +00:00
1a5e562bde only one FIMPL left! 2019-12-05 16:46:58 +00:00
45be26cf3f Merge branch 'develop' of https://github.com/fionnoh/Grid into feature/sparseNoise 2019-12-05 16:18:47 +00:00
5227ffccb7 Added James' sparse noise code and a module to use it 2019-12-05 15:50:03 +00:00
a0b47cc0be Merge pull request #254 from fionnoh/bugfix/eigenMigration
Updated Eigen URL after migration to gitlab
2019-12-05 15:26:38 +00:00
b766038810 new syntax after merge 2019-12-04 18:08:00 +00:00
cd9fd80a5d merged in develop 2019-12-04 17:12:46 +00:00
d6100cc35a Merge pull request #253 from mmphys/feature/distil
Fix phase convention adjustment error
2019-12-04 14:58:51 +00:00
29a1530510 Updated Eigen URL after migration to gitlab 2019-12-04 13:49:22 +00:00
15119eaf03 Fix phase convention adjustment error (and make no assumptions about node layout) 2019-12-04 09:59:58 +00:00
188e12ffbb Merge pull request #249 from mmphys/feature/distil
Feature/distil
2019-12-03 18:06:00 +00:00
e940f4db7e removed unused parameter parity 2019-12-03 12:01:31 +00:00
9c7f269489 typo in fimpl4 2019-12-03 11:19:54 +00:00
07feaf9531 updated ascii-doc preamble 2019-12-03 11:17:35 +00:00
7983ff2fdd Merge branch 'develop' into feature/distil
* develop:
  Change to reporting
  NVCC timer support
  Fix nocompilee under NVCC
  --enable-summit flag
  IBM summit optimisation. Synchronise in node is still btweeen 2 halves of AC922, so could be a little faster
  Sliced propagator contraction was not producing any results because buf.size()=0
  several typos in hadrons
2019-11-30 16:47:03 +00:00
2db814f2b7 Resolve conflicts in BaryonUtils (just use latest from develop) 2019-11-29 18:19:35 +00:00
6418f06771 Add option to save the eigenvectors of the Laplacian.
If they are saved, then metadata saved are:
solverXml	Parameters for this LapEvec module instance
OperatorXml	module type and parameters (if any) for the module that created the gauge field
2019-11-29 18:06:18 +00:00
8a5576f73c cleared up how exactly q_spec has to be defined 2019-11-28 12:35:18 +00:00
997790ad24 Allow subspace setup to no converge 2019-11-26 14:04:28 -05:00
900d6fad21 fp16 mandatory. Use SFW is not available as hdw 2019-11-26 13:26:43 -05:00
799ff0c96e speed-up 2019-11-26 15:28:47 +00:00
5fd5c25114 now two seperate functions for Eye and NonEye 2019-11-26 13:44:55 +00:00
62b3799c77 Merge pull request #251 from fionnoh/bugfix/WallWallMeson
MContraction::Meson bugfix
2019-11-26 12:46:03 +00:00
d1a89af8c9 Change to reporting 2019-11-22 10:49:10 -05:00
d91ba1f6cc NVCC timer support 2019-11-21 20:11:19 +00:00
f4d27e7090 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-11-21 20:09:31 +00:00
feb1ff3494 Fix nocompilee under NVCC 2019-11-21 20:03:39 +00:00
8ef6175acc Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-11-21 15:02:21 -05:00
e4399e3ee1 --enable-summit flag 2019-11-21 15:02:10 -05:00
98ea67b636 IBM summit optimisation. Synchronise in node is still btweeen 2 halves of AC922, so could
be a little faster
2019-11-21 15:00:46 -05:00
421a4395af Sigma to Nucleon contractions 2019-11-21 17:25:37 +00:00
cf95a460a5 Sliced propagator contraction was not producing any results because buf.size()=0 2019-11-21 17:17:55 +00:00
a60e20f265 Merge pull request #250 from mmphys/hadrons-typos
several typos in hadrons
2019-11-20 17:10:08 +00:00
9261c0da89 several typos in hadrons 2019-11-20 17:06:32 +00:00
b350a24ded fixed test_distil 2019-11-18 15:29:20 +00:00
13a0db7162 Reverse changes not intended to be part of distillation release 2019-11-18 12:34:49 +00:00
18177d9709 Review changes 2019-11-18 11:59:13 +00:00
7bf42b9c0e HADRONS_ERROR 2019-11-18 10:27:35 +00:00
2d6f4e0c09 fixed issue with HADRONS_ERROR, no idea why this works 2019-11-15 13:46:47 +00:00
7f06c40107 _var -> var_ 2019-11-15 13:26:24 +00:00
9f75065205 eigen_strong_inline gone 2019-11-15 13:22:20 +00:00
271a02230e assert -> ERROR 2019-11-15 11:11:50 +00:00
b1e8b5b5ce changed default behaviour as discussed with antonin 2019-11-15 11:00:25 +00:00
25d2521d77 small stuff 2019-11-13 16:34:09 +00:00
500ef17143 beauty 2019-11-13 15:14:51 +00:00
ee9dd22643 worked on test_distil 2019-11-13 14:59:44 +00:00
a977d9901b cleanup 2019-11-13 14:52:06 +00:00
667ffb70db changed error type 2019-11-13 12:16:56 +00:00
65b3059bd7 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-11-13 11:51:14 +00:00
5238808ccd No DistilVectors specified in xml no throws an error 2019-11-13 11:50:55 +00:00
8f88fee680 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil
* 'feature/distil' of github.com:mmphys/Grid:
  made notation DPar->dp consistent over modules
2019-11-13 11:34:10 +00:00
fcc412a1c2 Remove conditional compilation to support GPU build 2019-11-13 11:32:23 +00:00
12e415330f made notation DPar->dp consistent over modules 2019-11-13 11:21:08 +00:00
66e0811317 Attempt to fix cuda build 2019-11-13 00:02:51 +00:00
55e743aad6 Streamline 2019-11-12 23:57:28 +00:00
e2ab0d671e Implement destructors 2019-11-12 23:18:37 +00:00
7a4c5dbbd5 Restoring previous version for _reduced variables 2019-11-12 22:12:35 +00:00
3f00b8f6c7 Switch to std::unique_ptr<GridCartesian> grid3d;
Remove hand-coded reference to pi - switch to <math.h> definition
2019-11-12 21:53:09 +00:00
6d7043e0c2 NamedTensor changes done 2019-11-12 17:31:42 +00:00
b0f24ec302 Test works now 2019-11-12 15:14:13 +00:00
fb2834bf82 Oops 2019-11-12 14:01:20 +00:00
78f75b0e9f Better than graffiti 2019-11-12 14:00:46 +00:00
62dd0bfe58 New parameter module compiles. Untested. 2019-11-12 13:59:53 +00:00
db952993fa envCreate problem.. 2019-11-12 12:23:34 +00:00
b8f0878981 removed most default behaviour 2019-11-11 17:49:38 +00:00
df586a142d added DistilPar-module and cleaned up some code 2019-11-11 17:29:55 +00:00
7a446d5b7f removed default filenames 2019-11-11 14:36:45 +00:00
e7d7ea4f8f added LoadNoise module 2019-11-11 12:55:45 +00:00
f8e1941327 Implemented specialisations of NamedTensor as derived classes, however this suffers a number of problems:
1) virtual functions not available in base class constructor where I'd like to use them - e.g. IndexNames
2) Must define new constructors in derived classes
... so the specialisations are fatter than I'd like. Would prefer to revert to specifying tensor name and index name defaults in template
2019-11-08 11:55:00 +00:00
65aa54804e added comments 2019-11-08 11:15:51 +00:00
293bfe17d1 added code to the noise module... 2019-11-07 14:00:40 +00:00
a8f3a111a5 added Serial RNG - code compiles but not tested! 2019-11-07 13:45:38 +00:00
5c23abe507 commented on Notation 2019-11-07 11:57:40 +00:00
22c654182a Fixes for GPU compile 2019-11-04 17:24:34 +00:00
6f0439c0e4 Remove unnecessary cast 2019-11-04 15:50:14 +00:00
4f9a7c5d76 Back out unnecessary change 2019-11-02 16:50:29 +00:00
fcd90705bc Beautification 2019-11-02 16:15:48 +00:00
4bcdb4ff95 Remove accidental check-in of local debugging 2019-11-02 15:24:12 +00:00
1c10933db1 Rationalisation of NamedTensor (Perambulator) 2019-11-02 14:58:32 +00:00
52d8d576d0 Removed SliceShare as a reusable routine 2019-11-01 20:10:51 +00:00
ada0a7a83b C++11 case comparison of named tensor index names 2019-11-01 16:05:08 +00:00
efe2f2d48b Merge branch 'develop' into feature/distil
* develop:
  Summit jsrun GPU mapping updates. Conffigure with --enable-jsrun
  Fixed Lanczos calling aligned alloc in threaded region hitting up against pointer-cache no-threading restrictions Fixed Lattice::reset not compiling with new Grid explicit memory region handling Fixed memory leak in Lattice::resize that occurs when data region has been previously allocated
2019-11-01 15:38:48 +00:00
45d4cf0971 Cleanup in progress 2019-11-01 15:35:07 +00:00
ac614cbc53 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-10-31 11:46:43 -04:00
ec8e060ec7 Summit jsrun GPU mapping updates. Conffigure with --enable-jsrun 2019-10-31 11:46:09 -04:00
5c54f27ac1 some cleanup, but hard-coded src in LapEvec unclear 2019-10-31 11:51:05 +00:00
4ed9379535 some cleanup 2019-10-31 11:45:50 +00:00
858e348a6d Cleanup of messages 2019-10-31 11:11:52 +00:00
3b3680c64e Reversed Felix's interim A2Autils.h changes ... these were finished and went into develop via a separate branch 2019-10-30 15:50:04 +00:00
2a926b3dc6 Merged latest changes from develop, in preparation for release. 2019-10-30 14:52:34 +00:00
845a045493 Merge pull request #233 from giltirn/lanczos_fix
A few run /compile / memory leak fixes
2019-10-30 10:21:59 -04:00
eb8848a071 Merge branch 'develop' into feature/distil
* develop: (27 commits)
  Update README.md
  result layout standardised, iterator size more elegant
  updated syntac in Test_hadrons_spectrum
  chroma-regression test now prints difference correctly
  baryon input strings are now pairs of pairs of gammas - still ugly!!
  second update to pull request
  Changing back interface for Gamma3pt
  Removing old debug code
  Changes to A2Autils
  suggested changes for 1st pull request implemented
  changed input parameters for easier use
  Should compile everywhere now
  changed baryon interface
  added author information
  ready for pull request
  code compiling now - still need to test
  Baryons module works in 1 of 3 cases - still need SlicedProp and Msource part!!
  thread_for caused the problems - slow for loop for now
  still bugfix
  weird bug...
  ...

# Conflicts:
#	Hadrons/Modules.hpp
#	Hadrons/modules.inc
2019-10-30 14:13:00 +00:00
f31e3278a6 Update README.md 2019-10-25 11:43:55 -04:00
ca234325bc Fix single-precision error 2019-10-23 21:49:32 +01:00
c97f780784 Merge pull request #243 from fionnoh/feature/A2A_current_insertion
Feature/a2 a current insertion
2019-10-22 13:55:53 +01:00
78bdb0ff6a Grid 2019-10-20 14:22:45 +01:00
decab587a0 PerambFileName defaults to object name if empty 2019-10-20 14:14:06 +01:00
202f025fc7 Merge pull request #242 from mmphys/feature/baryons
Feature/baryons
2019-10-16 15:06:32 +01:00
3c702b510b result layout standardised, iterator size more elegant 2019-10-15 18:48:51 +01:00
519ce19128 Fixes to enable GPU build. NB: Contractor and ContractorBenchmark still not working 2019-10-14 22:40:13 +01:00
8d166a81c0 updated syntac in Test_hadrons_spectrum 2019-10-14 13:41:08 +01:00
aa62ca9046 chroma-regression test now prints difference correctly 2019-10-10 11:07:20 +01:00
2dee4791db baryon input strings are now pairs of pairs of gammas - still ugly!! 2019-10-09 17:56:09 +01:00
548b3bf43c second update to pull request 2019-10-09 14:52:33 +01:00
a55d0ba8fe Changing back interface for Gamma3pt 2019-10-08 15:52:01 +01:00
5de9547db5 Removing old debug code 2019-10-08 15:51:28 +01:00
6a3b09cf02 Merge branch 'develop' of github.com:fionnoh/Grid into feature/A2A_current_insertion 2019-10-08 13:25:51 +01:00
10de4bfc23 Changes to A2Autils 2019-10-08 13:24:56 +01:00
2ce7f2b4d8 suggested changes for 1st pull request implemented 2019-10-08 13:19:47 +01:00
88d6ff8f1d Peter's bugfix in ImplicitlyRestartedLanczos.h
My bugfix in MomentumPhase.hpp
2019-10-07 17:36:11 +01:00
803329af99 Merge branch 'develop' into feature/distil
* develop:
  Fix after GPU merge: Phase in Free Propagator
  z2-momentum phase module

# Conflicts:
#	Hadrons/Modules/MSource/MomentumPhase.hpp
2019-10-07 13:09:52 +01:00
9d96899aa8 Doc bugfix 2019-10-07 13:05:04 +01:00
86939dbf1a Removed unnecessary function (for getting a parameter) 2019-10-04 13:59:59 +01:00
317645aaeb undo (most) whitespace changes in the two files HMC/Mobius2p1fEOFA{,_F1}.cc 2019-10-02 16:25:23 +01:00
e280ec6b0b changed input parameters for easier use 2019-10-02 16:14:06 +01:00
d5a180d914 Merge branch 'fix/removeQCDremnants' into fix/remQCDns_ignore_ws 2019-10-02 16:11:27 +01:00
d2928761dd Merge pull request #240 from guelpers/feature/bugfixafterGPUmerge
Fix after GPU merge: Phase in Free Propagator
2019-10-02 15:00:15 +01:00
f2a74c603f Merge pull request #239 from mmphys/z2_momentum
z2-momentum phase module
2019-10-02 14:57:59 +01:00
5f22810f55 Fix after GPU merge: Phase in Free Propagator 2019-10-02 14:49:35 +01:00
92e25488f8 Added MomentumPhase Hadrons module from z2_momentum branch (thankyou, Felix) so I can run Z_2 wall with momenta easily 2019-10-02 14:13:35 +01:00
89ef2b7dc2 Should compile everywhere now 2019-10-02 13:20:07 +01:00
7606554b76 Remove references to unused modules (now part of separate Baryons branch) 2019-10-02 13:16:58 +01:00
c8fc0b3e0c changed baryon interface 2019-10-02 11:36:39 +01:00
ccb5e8374b z2-momentum phase module 2019-09-30 17:36:15 +01:00
b88fd436e7 added author information 2019-09-30 17:07:46 +01:00
155bcd4ff3 ready for pull request 2019-09-30 16:58:20 +01:00
d1daab601a Merge branch 'develop' of github.com:fionnoh/Grid into feature/A2A_current_insertion
Peter's GPU branch changes merged with A2A CI code
2019-09-30 16:53:44 +01:00
e5d7910fa7 code compiling now - still need to test 2019-09-30 13:55:26 +01:00
94b9a9474c Baryons module works in 1 of 3 cases - still need SlicedProp and Msource part!! 2019-09-27 15:08:56 +01:00
bf62ec163d thread_for caused the problems - slow for loop for now 2019-09-26 13:33:49 +01:00
8415e23fc6 still bugfix 2019-09-26 11:09:09 +01:00
76c93aa44e weird bug... 2019-09-17 14:36:26 +01:00
3137628222 BaryonUtils.h is now part of Baryons 2019-09-17 13:19:20 +01:00
ce965ee6bb Cleanup tests that are no longer required 2019-09-17 13:10:59 +01:00
911fbb0f36 Cleanup modules that are no longer required 2019-09-17 13:06:52 +01:00
eb293e9909 Restore Baryons modules per develop branch 2019-09-16 20:29:37 +01:00
f548114ff6 bugfix 2019-09-16 17:55:58 +01:00
dab8c01c3d added Baryon code 2019-09-16 17:20:54 +01:00
2f3dd0703d Ensure Distillation test (Test_distil) works 2019-09-16 17:00:46 +01:00
2e963d1a78 Fix location of Grid.h and remove reference to QCD namespace 2019-09-16 15:34:47 +01:00
bf52e7cc96 Latest BaryonUtils.h from Felix + my fixes 2019-09-13 18:11:10 +01:00
61d017d0a5 Merge GPU support (upstream/develop) into distillation branch.
This compiles and looks right ... but may need some testing

* develop: (762 commits)
  Tensor ambiguous fix
  Fix for GCC preprocessor/pragma handling bug
  Trips up NVCC for reasons I dont understand on summit
  Fix GCC complaint
  Zero() change
  Force a couple of things to compile on NVCC
  Remove debug code
  nvcc error suppress
  Merge develop
  Reduction finished and hopefully fixes CI regression fail on single precisoin and force
  Double precision variants for summation accuracy
  Update todo list
  Freeze the seed
  Fix compiling of MSource::Gauss for single precision
  Think the reduction is now sorted and cleaned up
  Fix force term
  Printing improvement
  GPU reduction fix and also exit backtrace option
  GPU friendly
  Simplify the comms benchmark
  ...

# Conflicts:
#	Grid/communicator/SharedMemoryMPI.cc
#	Grid/qcd/action/fermion/WilsonKernelsAsm.cc
#	Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h
#	Grid/qcd/smearing/StoutSmearing.h
#	Hadrons/Modules.hpp
#	Hadrons/Utilities/Contractor.cc
#	Hadrons/modules.inc
#	tests/forces/Test_dwf_force_eofa.cc
#	tests/forces/Test_dwf_gpforce_eofa.cc
2019-09-13 13:30:00 +01:00
04a661cafe Remove unused modules BC2 and Baryon2 2019-09-10 14:49:24 +01:00
a7fa86dc29 MooeeInv improvement for DW EOFA + comments 2019-09-05 12:05:21 +01:00
0c1efa5235 pass OpenMP flag to host compiler 2019-09-03 12:12:25 +01:00
fdd9b14e82 speed up MooeeInvDag for DWF EOFA 2019-09-02 14:49:51 +01:00
e66669d300 fast MooeeInv for EOFA 2019-09-02 14:26:13 +01:00
0efaf3c4fa access M5D coeffs through pointers 2019-09-02 11:33:00 +01:00
3ef519aaa4 fast MooeeInv 2019-09-02 11:18:14 +01:00
b473405652 Tensor ambiguous fix 2019-08-29 09:36:41 -05:00
114ebb7914 Fixed Lanczos calling aligned alloc in threaded region hitting up against pointer-cache no-threading restrictions
Fixed Lattice::reset not compiling with new Grid explicit memory region handling
Fixed memory leak in Lattice::resize that occurs when data region has been previously allocated
2019-08-26 16:47:44 -04:00
9b7a6d197f Fix for GCC preprocessor/pragma handling bug 2019-08-23 14:37:46 +01:00
59cd7f3b70 Trips up NVCC for reasons I dont understand on summit 2019-08-23 06:03:49 -04:00
28d6be2a4e Fix GCC complaint 2019-08-22 18:56:37 +01:00
6b6c5aa626 remove namespace QCD from directory tests 2019-08-20 15:35:36 +01:00
9210b0aa6e remove namespace QCD from directory HMC 2019-08-20 15:21:23 +01:00
ad01290545 remove remnants of the namespace QCD 2019-08-19 20:30:33 +01:00
25150eb2e0 3pt contraction now takes a list of gammas 2019-08-15 12:09:30 +01:00
95f66cc93c Merge branch 'feature/gpu-port' into develop 2019-08-15 02:19:31 +01:00
d566637cec Merge branch 'develop' of github.com:fionnoh/Grid into feature/A2A_current_insertion 2019-08-07 12:11:40 +01:00
51bed48cd2 added selfcontract module 2019-08-05 17:46:42 +01:00
b875edceab Merge branch 'feature/distil' of https://github.com/mmphys/Grid into feature/distil
Conflicts:
	Grid/qcd/utils/BaryonUtils.h
	Hadrons/Modules/MContraction/Baryon2.hpp
2019-08-05 14:19:43 +01:00
29df60c0cb some debugging stuff 2019-08-05 14:10:04 +01:00
8d97e2a02a Say which A2AMatrix is being loaded, and which contraction is being performed (m of n) 2019-08-02 19:23:18 +01:00
ed23f6be20 Remove blank line from log 2019-08-02 15:59:18 +01:00
cad76827b0 Be consistent about separator usage. Log start / stop / duration 2019-08-02 15:47:20 +01:00
310867d46a Additional option to specify the separator used between terms in correlator 2019-08-02 11:25:29 +01:00
e598178d94 TODO: Felix, please fix. I commented this out because of compiler errors 2019-08-01 20:51:51 +01:00
723457d467 Contractor updates ready for test on Tesseract:
1) Move definitions of serialisable objects into header for re-use by external programs/utilities
2) Add "-s" switch for "Simple" correlators, i.e. only include A2AMatrix info for the actual fields included in each contraction
2019-08-01 20:35:55 +01:00
6f40021842 Fixed compiler errors: TODO: Felix, please validate 2019-08-01 19:57:59 +01:00
622d5eaa3e Merge branch 'feature/distil' of https://github.com/mmphys/Grid into feature/distil 2019-07-30 13:47:22 +01:00
e66d48c142 second way to compute baryons - qdp style 2019-07-30 13:46:59 +01:00
f5ad4f3de8 Added the ability to write a version of the validated XML file excluding any of the module IDs supplied in a separate exclude file 2019-07-26 19:46:55 +01:00
e7050a7aed Support gamma structure names that have trailing white space 2019-07-19 11:58:56 +01:00
e138bc7204 debug output 2019-07-19 11:16:35 +01:00
6d4fb35d84 Ready for testing 2019-07-19 10:33:03 +01:00
56cefadf9b gamma matrices as input 2019-07-18 17:46:43 +01:00
9d82855c5d bugfix in Baryonutils 2019-07-18 15:45:43 +01:00
97d61f2564 bugfix in Baryonutils 2019-07-18 14:57:10 +01:00
11a8668d19 bugfix in Baryonutils 2019-07-18 14:44:55 +01:00
cded7670d0 new utils for baryons 2019-07-18 14:29:04 +01:00
feb029fb66 new utils for baryons 2019-07-18 14:24:16 +01:00
5a62ebe7b1 general baryons case added 2019-07-15 15:26:30 +01:00
fa747173d1 Debugging references were to l-values, so added const to stop errors 2019-07-14 11:08:00 +01:00
12afb0395f Debugging transposeSpin - seems just not to be implemented for Lattice<x> 2019-07-11 17:42:26 +01:00
ec4aa978ab why cant I spinTranspose 2019-07-11 14:01:41 +01:00
7bc4a06f3f This is probably what you want ... 2019-07-10 12:29:33 +01:00
cd659525e1 You probably want to add this to the build. And you may need to do a bootstrap 2019-07-10 12:08:37 +01:00
dc2240d2d8 why does sliceSum in Nucleon.hpp not work 2019-07-10 11:34:16 +01:00
98cf20cf06 continued work on baryons 2019-07-09 17:42:36 +01:00
cc3346073e continued work on baryons 2019-07-09 17:30:32 +01:00
3848da7c50 added nucleon module (non-distillation) 2019-07-08 17:43:14 +01:00
b7d0cf6751 buxfix in diquark sum / baryons 2019-07-04 22:06:37 +01:00
2c1a077369 continued on baryons 2019-07-02 17:55:28 +01:00
ae3abbe53d Added the ability for Perambulator module to save unsmeared sinks through the addition of two optional parameters:
UnsmearedSinkFileName: If present, specifies the filename to write to
UnsmearedSinkMultiFile: defaults to true to write each sink vector to a different file, but can be set to 0 for a single file
2019-07-01 17:28:27 +01:00
5fc0188205 started saving sinks 2019-07-01 14:51:59 +01:00
67690df3bd Changes nedded to have a current insertion on every second time slice - avoids unnecessary contractions 2019-06-28 15:18:28 +08:00
ce29b18dc9 New modules for loading in MFs as diskvectors and producing propagaotrs from 4 quark contractions 2019-06-27 13:46:06 +08:00
421a0a8a36 Changes to A2Autils, A2AMatirx and DiskVector code that is needed for Hadrons 4 quark contraction module 2019-06-27 13:45:20 +08:00
ac530636ca A2Aloop bugfix 2019-06-27 13:44:47 +08:00
2d940a598c Inserted four extra parameters just to make this test compile. Needs to be fixed properly 2019-06-19 10:37:50 +01:00
c28c5fc61b Inserted four extra parameters just to make this test compile. Needs to be fixed properly 2019-06-19 10:31:41 +01:00
015340d60c Elided superfluous copy on write 2019-06-19 09:37:03 +01:00
9a8a63467e BC2 now runs. setup() runs twice, which had resulted in doubling up of momenta. Also fixed initialisation of momentum phases. 2019-06-12 15:25:59 +01:00
fe72dc099b Upgrade to Mojave forced me to reinstall MacPorts. These are the ports I installed to get Grid working 2019-06-04 16:12:24 +01:00
54edb9906e Housekeeping. #include <Grid.h> ---> #include <Grid/Grid.h> 2019-06-03 15:20:46 +01:00
9ff459816f ReadBinary needs to do case insensitive name comparison (since I changed the default case of perambulator column names) 2019-06-01 13:50:27 +01:00
eb737daeb5 Merge branch 'develop' into feature/distil
* develop: (34 commits)
  Hadrons: EMLepton: Wall source
  Revert "cleaning up Kl2 contraction"
  cleaning up Kl2 contraction
  posibility to save/load schedules directly from the application parameters
  moving VERSION file to the empty ChangeLog one, this create compilation problems with #include <version> in recent versions of LLVM and case-insensitive FS (typically macOS)
  Added precision tuning to Hadrons parameterfile writing
  Kl2 QED cleanup
  Added ZFIMPL to SeqGamma
  Added ZFIMPL to SeqConserved module
  F1 ensemble running with 96%~ acceptance etc..
  Make detection of HPE 8600 automatic
  Added variables that were missing from wall source setup
  Exposed a coulomb/landau enum to the gauge fixing module
  Coulomb gauge added as an option
  More logging, timing, and 4d/5d logic for eigpack gauge transforms
  Added gauge transform option to eigpack IO
  Hadrons: Lepton Propagator for kl2, sign swap for antiperiodic boundary
  A2A Lepton-Meson Field contraction
  Verbose
  Iteratoin range fix
  ...
2019-05-31 18:20:43 +01:00
8ce7ebdca3 fixed contraction issue 2019-05-17 10:52:55 +01:00
435653490e fixed contraction issue 2019-05-17 10:50:15 +01:00
10a052d695 3 issues preventing compilation under clang. Marked these with FELIX_ISSUE and made minimal change to make compile (as fix not obvious) 2019-05-17 09:59:01 +01:00
acd5a01b65 some work on baryons 2019-05-16 15:11:50 +01:00
ec7d96ce3b Merge branch 'develop' into feature/distil
* develop:
  Hadron WeakEye and A2ALoop bug fixes, and WWVVContraction bug fix
  DiskVector: fix of memory bug triggering segfault when the cache is accessed following a certain pattern
  MFermion::GaugeProp fix for 4d fields
2019-05-14 13:10:40 +01:00
c16916cc45 Multiple local slice fixes 2019-05-06 10:35:42 +01:00
a865caf0d2 Forgot a const in IndexName only version of NamedTensor constructor 2019-05-03 22:17:25 +01:00
9ae4d369f3 Use the definition of the Perambulator Index names given in Hadrons::MDistil 2019-05-03 22:00:50 +01:00
ec24a1f828 Fixed 2 bugs in LapEvec: 1) InsertLocalSlice 2) ensure convergence assertion stops entire machine 2019-05-03 16:03:56 +01:00
0efe63f6fa 3D smearing fix 2019-05-02 19:37:59 +01:00
b7ead6c16a Fixed bug: iff stout smearing disabled then gauge field uninitialised 2019-05-02 18:20:49 +01:00
62692b68b9 I'd forgotten that Intel '17 doesn't like auto var{value}; syntax 2019-05-01 20:45:16 +01:00
311c35a15c Looking for fixes for Intel '17 compiler errors. std::cout << complex number ? 2019-05-01 18:22:08 +01:00
a3fe57f430 NamedTensor writes to tag NamedTensor by default (not filename) - so still usable in case user renames file.
Also tweaked tensor index name checking (which is used to ensure tensor is correct type)
2019-05-01 18:11:37 +01:00
8dc0587621 Post Michael / Felix review. Ready for Peter / Antonin review 2019-05-01 13:04:51 +01:00
cfe5fa7a35 1) Don't write Laplacian eigenvectors to disk 2) Add a test that loads perambulators from disk 2019-05-01 09:50:23 +01:00
e72e26c899 Get rid of unnecessary multiFile options 2019-05-01 08:53:08 +01:00
334f29becb Fairly close to ready for release. Felix and I to review, then submit for release 2019-04-30 23:53:57 +01:00
e56ead55ef WIP 2019-04-30 14:41:48 +01:00
d74d443d1b Pre-release cleanup in progress 2019-04-29 22:18:29 +01:00
4203105104 Part-way through release tidy-up 2019-04-29 18:40:38 +01:00
ac19c0e04f This will need to be removed eventually, but should save us fiddling about with each release 2019-04-29 09:20:08 +01:00
b48ca8a6ef Merge branch 'develop' into feature/distil
* develop: (36 commits)
  Mobius 2+1f sign off.
  Integrator logging on by default
  RHMC for mobius
  HMC make file
  Update
  Simple check
  Simple checks
  Monius HMC
  Changes locally
  Power method
  Momentum rescaling
  Bounds checking
  Bounds checking
  Scale momentum convention to CPS/UKQCD MD time
  Add bounds checking
  Updated documentation after Peter's review. 1) Removed version numbers from Grid dependencies 2) Explained in a little more detail how to use Xcode to build Grid and Hadrons libraries
  Remove bundled Eigen stuff
  Fix typo so it matches develop
  Remove bundled source from my local repository
  Slightly generalize interface to SchurRedBlackBase and derived solver classes so we can pass forecasted initial guesses in EOFA heatbath correctly
  ...
2019-04-29 08:37:39 +01:00
c48ae4f3ad 1) Only the boss should write the perambulator - possibly was a source of intermittent corruption?
2) Implemented and test a perambulator conversion utility in Test_distil (commented out near the start of main)
2019-04-28 23:24:57 +01:00
fb74de0798 Making sure Hdf5 is an optional dependency (default to binary writer if not present) 2019-04-28 20:23:44 +01:00
adc1eaee68 Switched to Hdf5 format for perambulators. Ready for first test on Tesseract. 2019-04-28 17:53:42 +01:00
5aca4e8670 Just realised that the trace is at every lattice site, so moved the check for no smearing further up 2019-04-26 17:23:18 +01:00
e223d0b99f Need to validate range about which exp^iQ is considered unity 2019-04-26 16:00:35 +01:00
2e220456d3 First attempt at minimising smearing 2019-04-26 15:54:05 +01:00
4333d97958 fixed parameter 2019-04-26 14:29:21 +01:00
55c9c45d4b Merge branch 'feature/distil' of https://github.com/mmphys/Grid into feature/distil 2019-04-26 14:28:01 +01:00
e70e03f560 started stout smearing for small w 2019-04-26 14:27:40 +01:00
ff5e2e0f47 Debug output fix. Meant to print the rho matrix for stout smearing ... not the address of the function that creates it 2019-04-26 12:30:41 +01:00
4f3d1ea6e8 Two heads are better than one. Combined effort and hopefully spatial smearing now fixed! 2019-04-26 12:18:11 +01:00
b1768ba820 Urgh! 2019-04-26 10:04:27 +01:00
3ac5a69a57 Ready to test spatial smearing (again) 2019-04-26 08:54:30 +01:00
50a74eaea3 Doesn't compile. Does it still need to be maintained? 2019-04-26 08:33:10 +01:00
8419fbb335 Renamed PerambLight module. Check with Felix whether Test_24 and Test_tesseract still need to be maintained 2019-04-26 08:23:15 +01:00
23a9b93cda More dependencies for Distil.hpp move and (C) 2019 only 2019-04-26 07:39:05 +01:00
ecdc3ddebf Moved Distil.hpp and added GNU license to all files 2019-04-26 07:24:56 +01:00
606698511c Seems we've not been keeping the test up-to-date 2019-04-22 19:03:24 +01:00
a97b814f0c Remove redundancy in LapEvec filename 2019-04-19 14:09:36 +01:00
7214681e11 Spatial smearing doesn't work yet. Fixed inconsistency in naming of perambulator in PerambLight.hpp 2019-04-19 13:54:25 +01:00
143b75956c Stout smearing 3D fixes. Changed LapEvec to perform spatial smearing only 2019-04-19 11:54:02 +01:00
4a4203c610 fixed stout smearing for now 2019-04-18 19:10:49 +01:00
2b598294c9 added distil source module 2019-04-18 17:47:09 +01:00
d111c70c38 Merge branch 'develop' into feature/distil
* develop:
  Make sure Grid::Serializable can write Eigen Tensors to output streams. NB: 1) The Eigen package defines operator<< for Eigen tensors, but this format is different, hence Grid::Serializable::WriteMember 2) For simplification, the contents are written in memory order. I.e. Different results will be obtained depending on whether the tensor is row- or column-major
  ... this time without the new Distillation modules ...
  Eigen tensor serialisation fixes after Antonin's review
  Iterator added. Will wait for review comments before finalising.
  Fix build with Intel '17 compiler, i.e. workaround incorrect auto types for c++ style definitions. E.g. assuming T::rank is an int, then objects defined like so:     const auto rank{T::rank}; should also be int. Unfortunately, Intel '17 instead defines them to be std::initializer_list<int>, then proceeds to complain where these variables are used that they cannot be converted to int. NB: This was fixed under Intel '18
  Pushed paboyle's changes: Updates for clang happy
  Merge paboyle's no compile in single precision Intel 2019 fix
  Eigen::Tensor serialisation. Tested on single and double precision builds
2019-04-10 13:14:24 +01:00
ed2427d5f7 Make sure Grid::Serializable can write Eigen Tensors to output streams. NB:
1) The Eigen package defines operator<< for Eigen tensors, but this format is different, hence Grid::Serializable::WriteMember
2) For simplification, the contents are written in memory order. I.e. Different results will be obtained depending on whether the tensor is row- or column-major
2019-04-06 15:37:53 +01:00
ea2f34de7b Updated documentation after Peter's review.
1) Removed version numbers from Grid dependencies
2) Explained in a little more detail how to use Xcode to build Grid and Hadrons libraries
2019-04-06 13:37:47 +01:00
63dc0fa7e9 Fixed memory leak ... without breaking semantics of prior code. Possibly should change the semantics? For Peter / Antonin to comment 2019-04-04 16:00:17 +01:00
5e6104e683 Merge branch 'feature/distil' of https://github.com/mmphys/Grid into feature/distil 2019-04-04 12:13:35 +01:00
25e4ee3a49 3D Stout smearing added 2019-04-04 12:13:16 +01:00
4161429dcc Serialisation fixes after Antonin's review 2019-04-03 22:30:07 +01:00
b5eb97206b Merge branch 'develop' into feature/distil
* develop:
  MGauge::GaugeFix use standard convention for fields
  fix bug: MGauge::GaugeFix should not modify its input
  add gauge transformation matrix as output to module MGauge/GaugeFix
2019-04-03 16:24:49 +01:00
0da906cf66 Merge branch 'develop' into feature/distil
* develop:
  Documentation for using Grid with Xcode on Mac OS
2019-03-27 23:08:29 +00:00
3decb5f886 Merge branch 'develop' of github.com:paboyle/Grid into feature/distil
* 'develop' of github.com:paboyle/Grid:
  endianness fix in resilient IO
2019-03-27 20:39:23 +00:00
faa8bb9bc6 Fixed funny memory leak 2019-03-27 17:55:52 +00:00
4c02ed6d0c Updated GridXcode documentation 2019-03-27 13:54:39 +00:00
f757b80e1c tried to fix mem leak 2019-03-27 12:00:36 +00:00
b8581be1da : 2019-03-27 11:59:06 +00:00
9fce1263be Fixed bug in LapEvec if machine running spread-out in time 2019-03-26 13:24:39 +00:00
ae565b006a Compiling in single-precision now works 2019-03-25 22:56:01 +00:00
8502660023 Begin fixes for single precision 2019-03-25 20:40:05 +00:00
625a97a466 cosmetic 2019-03-25 18:16:04 +00:00
bce2766fef Merge branch 'feature/distil' of https://github.com/mmphys/Grid into feature/distil 2019-03-25 16:38:42 +00:00
ce501afec6 bugfix 2019-03-25 16:38:25 +00:00
1d10a3b3de Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil
* 'feature/distil' of github.com:mmphys/Grid:
  bugfix
2019-03-25 15:50:57 +00:00
d1e02f50ff Added iterator for Eigen tensors 2019-03-25 15:50:29 +00:00
48b03c4590 bugfix 2019-03-25 15:45:35 +00:00
b3b9e608e1 added new module for noises 2019-03-25 14:13:03 +00:00
4e87cbd400 Fix build with Intel '17 compiler, i.e. workaround incorrect auto types for c++ style definitions.
E.g. assuming T::rank is an int, then objects defined like so:
const auto rank{T::rank};
should also be int. Unfortunately, Intel '17 instead defines them to be std::initializer_list<int>, then proceeds to complain where these variables are used that they cannot be converted to int. NB: This was fixed under Intel '18
2019-03-23 09:28:41 +00:00
4fc045b563 added module to load perambulators from disk 2019-03-22 13:50:47 +00:00
fbf286b0e3 added Spin dilution 2019-03-22 13:30:11 +00:00
9dc3fe9922 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil
* 'feature/distil' of github.com:mmphys/Grid:
  modules list
2019-03-22 13:00:06 +00:00
6c9029fab7 Merge branch 'feature/distil' of https://github.com/mmphys/Grid into feature/distil 2019-03-22 12:41:56 +00:00
8700dd4d0d modules list 2019-03-22 12:41:53 +00:00
9c16391e55 Merge branch 'develop' into feature/distil
* develop:
  Updates for clang happy
2019-03-22 12:08:50 +00:00
685d9bafef Merge branch 'develop' into feature/distil
* develop:
  No compile in single precisoin Intel 2019 fix
2019-03-21 16:36:48 +00:00
d2d26b302d Removed the module we don't need from modules.inc (so make now works)
i.e. removed Modules/MDistil/PerambMultipleSolves.hpp from Hadrons/modules.inc
2019-03-20 22:59:20 +00:00
88cb004731 Fixed single-precision issues in Test_serialisation 2019-03-20 22:05:16 +00:00
a66bb8acba fixed possible memory leak 2019-03-20 14:41:36 +00:00
4ae35000a9 removed module which we do not need 2019-03-20 13:36:57 +00:00
02b96b4602 Fixed module list (messed up when I merged from develop) 2019-03-20 11:20:40 +00:00
11dded61e8 Merge branch 'develop' into feature/distil
* develop: (29 commits)
  precision fix
  Updates after review with Peter.
  Wilson clover multi grid for lime lattice
  Recommendations for Traits classes
  Hadrons: uninitialised pointer fix (might have been harmless)
  Hadrons: beware of the nasty uninitialised twists
  Smearing test. Test on free field.
  Smearing for quark observables
  Smearing
  Hadrons: XML validator utility
  display relative norm during field IO norm check
  possibility to set a build number
  IO norm check on relative norm
  Output field norm check during IO
  Hadrons: random vector utility module I/O
  quieter initialisation
  fix patch command for eigen in bootstrap.sh
  Mres changes and gauge xform mat changes
  Hadrons: 32 bit I/O directly in Lanczos module
  Hadrons: copyright update
  ...

# Conflicts:
#	Grid/tensors/Tensor_traits.h
#	Hadrons/Modules.hpp
#	Hadrons/modules.inc
2019-03-20 10:35:36 +00:00
24cf3b9df5 Ignore Version.h as it's created by automake/autoconf 2019-03-19 12:12:39 +00:00
9c8aa2047d Put GridXcode doc in subdirectory 2019-03-19 07:33:19 +00:00
204cfa1c5a Added documentation for Grid using Xcode 2019-03-19 07:28:29 +00:00
fe6845d38b Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-03-18 14:44:18 +00:00
bff4eeec41 Added disclaimer on half-precision types 2019-03-18 12:15:25 +00:00
d1fe4dce33 new idea to get multiple perambulators 2019-03-15 10:28:02 +00:00
50ca3101de bug in multiSolves and new test prog 2019-03-13 17:25:55 +00:00
0faf40e207 last commit did not compile - fxied this 2019-03-13 13:24:18 +00:00
5313e44d11 some cleanup 2019-03-13 13:15:12 +00:00
6bb9b67c93 externalised gauge field reading to hadrons module 2019-03-13 12:09:12 +00:00
a0405c6d84 PerambMultipleSolves.hpp compiles (not had time to test) 2019-03-12 14:01:29 +00:00
c2a3231cdf added testing module for multiple perambulators 2019-03-11 18:05:39 +00:00
5fb2ee89bb modified test so that it runs 2019-03-08 16:50:21 +00:00
608a98d870 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-03-08 16:28:34 +00:00
2df396380d solver is now external 2019-03-08 16:28:21 +00:00
64ba664637 changed debug options 2019-03-08 12:25:00 +00:00
4a70b2ffd4 Aslash insertions work now? 2019-03-08 12:23:22 +00:00
2d659015ff Serialisation is fully functional. Ready for review. 2019-03-08 00:30:43 +00:00
e63019ac50 Tensor serialisation is fully functional 2019-03-08 00:01:45 +00:00
dde118fed9 added everythong to compute sequential aslash fields 2019-03-07 17:36:53 +00:00
1538bf8c34 added everythong to compute sequential aslash fields 2019-03-07 17:36:22 +00:00
4abc498ae3 Merge branch 'feature/distil' of https://github.com/mmphys/Grid into feature/distil 2019-03-07 15:34:10 +00:00
93dfbfbfcd added module to compute perambulator from a solve 2019-03-07 15:33:50 +00:00
f9e273d4bf Making sure same as Traits-recommend 2019-03-07 14:33:04 +00:00
584fa0a633 Changes after review with Peter 2019-03-07 12:53:34 +00:00
73cdca3973 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-03-06 13:55:51 +00:00
d716f8a0c9 new module for baryon contraction 2019-03-06 13:55:36 +00:00
aa24f04911 Changed EigenIO to use GridTypeMapper type traits 2019-03-06 12:55:05 +00:00
1880e6d12d Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-03-06 11:16:34 +00:00
4a00513e65 Moving Eigen trensor utilities to separate (optional) header 2019-03-06 11:16:22 +00:00
7718ee199a efficient baryon test program 2019-03-05 17:16:42 +00:00
d7c7bff065 added output for source meson fields on all tsrc 2019-03-05 12:01:55 +00:00
802675f062 baryons should compile now... 2019-03-04 17:31:21 +00:00
d56d8c923f Replaced an error in A2AUtils.h that was stopping the build with an assert() 2019-03-02 00:36:53 +00:00
00c3c6fc54 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-03-02 00:24:47 +00:00
b3d4ba8657 Fixed issues with Eigen Tensor serialisation. Fixed issues with precision to text streams 2019-03-02 00:24:37 +00:00
a4d578bd5d baryons work now??? 2019-03-01 14:44:39 +00:00
7653649389 baryons working now 2019-03-01 12:57:41 +00:00
a344a2227e Fixing build errors 2019-02-28 20:30:16 +00:00
4b9200b35c Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-28 19:06:36 +00:00
91be028507 Still one issue on write 2019-02-28 19:06:25 +00:00
3b05f91f5c Prototype for template traits recommendations 2019-02-28 19:04:44 +00:00
8804271339 efficient baryons compile! 2019-02-28 16:32:40 +00:00
6d9f377913 added parity 2019-02-28 11:05:31 +00:00
18b603c5ae simple but hopefully efficient baryon field 2019-02-28 10:27:05 +00:00
e9784572af baryons... 2019-02-27 17:51:25 +00:00
f168a9e7ee continued with baryons 2019-02-26 16:41:52 +00:00
50b6db75da Merge branch 'feature/distil' of https://github.com/mmphys/Grid into feature/distil 2019-02-26 15:57:09 +00:00
df065f1d57 first test configs 2019-02-26 15:57:01 +00:00
578eb177e7 Tweaked format and memory use on Xml format. Still crashes (out of memory) on large read on my laptop 2019-02-25 22:03:21 +00:00
81b3f3d2ca Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-25 15:39:07 +00:00
7c7ffa3b10 Added text read/write 2019-02-25 15:38:47 +00:00
1f098ceecf Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-25 15:36:23 +00:00
c47c1a2472 started working on baryons - this time efficiently 2019-02-25 15:36:11 +00:00
ec45b16840 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-25 14:10:34 +00:00
9288019789 Added Xml IO (has one deficiency: the format for multi-dimensional data is flat) 2019-02-25 14:10:24 +00:00
9c04139362 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-25 12:40:44 +00:00
cfc14a7432 more adjustments to test 2019-02-25 12:40:32 +00:00
31e40c26fa Oops. Forgot to delete SortNode (prevented linking) 2019-02-25 11:35:33 +00:00
3f2fe5c7e7 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-25 11:18:34 +00:00
76b6e8a01e first tesseract test 2019-02-25 11:18:25 +00:00
f9543982e4 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-25 11:07:43 +00:00
3c9f2d4106 Chunking layout reasonably efficient. Looks for small prime factors of each dimension, falling back to approximate size if needed. 2019-02-25 11:07:29 +00:00
cad26a736e quick&dirty fix for g5*field 2019-02-22 17:05:16 +00:00
4f2ac433f1 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-22 16:31:26 +00:00
f9e505108b test Aslash 2019-02-22 16:31:17 +00:00
d2aced13da Merge branch 'feature/distil' of https://github.com/mmphys/Grid into feature/distil 2019-02-22 16:30:40 +00:00
03d031d623 tesserct test 2019-02-22 16:30:22 +00:00
44a2d4854a Ensured Hdf5 chunk size always less than 4GB 2019-02-22 15:14:32 +00:00
292ff33f7f Removed issue with std::string_literal 2019-02-21 16:51:05 +00:00
55886cf9db ran make_module_list.sh 2019-02-21 16:14:13 +00:00
c640923159 Fixed reference to depth from test 2019-02-21 15:48:52 +00:00
752530f352 Gotten rid of c++17 in Test_serialisation.cc 2019-02-21 14:43:07 +00:00
34b9450fc9 Gotten rid of c++17 2019-02-21 14:22:48 +00:00
5d6462b706 bugfix 2019-02-21 11:13:10 +00:00
f70c5b004a some cleanup in Baryon2pt 2019-02-20 12:56:13 +00:00
5bb9de9242 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-19 17:37:37 +00:00
982a24514b Binary IO also implemented and tested 2019-02-19 17:37:21 +00:00
97c6f770b4 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-19 17:22:41 +00:00
4522f1e831 separated final 2pt Contraction 2019-02-19 17:22:30 +00:00
c14547ddbe EigenIO writing rationalised. All indices (trivial or not) written 2019-02-19 16:12:55 +00:00
63c97db414 Prior to rationalising 2 versions of BaseIO::write (scalar and vector) 2019-02-19 13:29:08 +00:00
6ebb32ffbf Rationalised Test_serialisation 2019-02-18 21:40:53 +00:00
07c97cb424 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-18 17:12:36 +00:00
04b58de5de Read-back working. 2019-02-18 17:12:27 +00:00
6e822b7201 added sign for contraction sum 2019-02-18 15:21:13 +00:00
625ccfcd72 continued baryon contraction code 2019-02-18 13:10:34 +00:00
c77069244d Nearly ready. Just finishing off readback and compare 2019-02-18 08:55:50 +00:00
9815ddb853 Started read routines. Introduced readMultiDim and tested I didnt break anything 2019-02-16 19:30:33 +00:00
74a3a5b825 Fixed existing bug in Hdf5Reader::readDefault for std::vector<U> 2019-02-16 18:45:46 +00:00
00e9416e0a Tweak to initialisation example 2019-02-16 17:08:22 +00:00
b6803a070a Making sure I understand row-major vs column-major ordering 2019-02-16 16:18:28 +00:00
bfd2770657 started on baryon flavour sums 2019-02-15 15:51:46 +00:00
668b1e77c7 small changes 2019-02-15 15:31:53 +00:00
e51744260f Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-15 14:32:26 +00:00
e0987d7d81 first contraction version done 2019-02-15 14:32:17 +00:00
26b94d7bda Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-15 13:53:00 +00:00
df0c8b5d84 Test of Eigen slices 2019-02-15 13:52:49 +00:00
a111d814db Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-15 10:47:41 +00:00
e8bd8767c0 Get rid of declarations inside constexpr functions. if constexpr warning remains 2019-02-15 10:06:15 +00:00
8cb96cb693 Hmmm lots of warnings depending on compiler ... 2019-02-14 19:17:12 +00:00
b9bee45277 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-14 19:05:47 +00:00
bee24655cd Finalising traits 2019-02-14 19:05:35 +00:00
886c895f81 baryon field structure is now eigentensor - started on contractions for 2pt functions 2019-02-14 16:44:54 +00:00
59c8cc1588 Minor bugfix 2019-02-13 22:11:24 +00:00
11467a994d Enough for tonight 2019-02-13 21:48:35 +00:00
9f2ca98dfc enseble can now be specified in LapEvec 2019-02-13 13:54:31 +00:00
bf434b6bef Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-13 12:14:18 +00:00
41ff592515 Moved serialisation tests into Test_serialisation 2019-02-13 12:14:01 +00:00
48ec937c55 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-13 11:48:57 +00:00
65731546b7 merge... 2019-02-13 11:48:34 +00:00
76c6a6772a Added rank_non_trivial 2019-02-12 22:15:55 +00:00
e7048231bc Working version with additional Grid traits pre: review by Antonin 2019-02-12 13:59:48 +00:00
49babeab19 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-11 23:26:46 +00:00
fb2cb3015e Writing of Eigen::Tensor of grid objects now works (for Hdf5) 2019-02-11 23:26:18 +00:00
53f45d2c7e Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-11 17:39:55 +00:00
d889cebc60 unique string is now used 2019-02-11 17:39:42 +00:00
9a225235b6 Can write both fixed and dynamic sized tensors (small tidy) 2019-02-11 17:15:38 +00:00
dff7d9261d Can write both fixed and dynamic sized tensors 2019-02-11 15:47:40 +00:00
6f2663edf6 Serialisation of an object containing an Eigen::Tensor works for Hdf5. Still quite a lot of tidying up to do. 2019-02-10 23:19:20 +00:00
d5024bd07e Hdf5 writing of scalar (i.e. no Grid subtypes) Eigen::Tensor works. But issues when adding Eigen::Tensor to serialisable object. 2019-02-10 15:33:16 +00:00
9c4189484a Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-09 17:12:43 +00:00
3720103f41 Adding Eigen::Tensor still WIP 2019-02-09 17:12:36 +00:00
c4d27ee30f added parity operator to baryon fields 2019-02-08 15:49:52 +00:00
d26a5dce12 bugfix 2019-02-08 14:37:09 +00:00
5843a943d9 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-08 14:36:08 +00:00
c1341b8ed2 bugfix 2019-02-08 14:33:06 +00:00
6a4515d0cd baryons have now the correct (?) structure - also easier! 2019-02-07 12:27:57 +00:00
a0a39e4b00 Fixed initialisation of vector of Complex 2019-02-06 21:56:44 +00:00
b9fb16077c Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-06 21:37:54 +00:00
4b3c566c89 ../tests/hadrons/Test_hadrons_distil.cc 2019-02-06 21:36:46 +00:00
cbd2dfe53f Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-06 12:56:49 +00:00
6cdb1eb62c BContraction now computes what might be a baryon function, but probably isn't 2019-02-06 12:23:52 +00:00
ed7175076b Turned off warning of unused variable line 150 2019-02-06 09:32:13 +00:00
27677b3870 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-06 09:25:39 +00:00
7423f5af1a Examples of how to access Grid Tensors 2019-02-06 09:25:24 +00:00
21d6dbe0b6 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-02-05 17:32:39 +00:00
1ee84509b5 added baryons project - not working yet 2019-02-05 17:32:26 +00:00
57e57d162f Removed Eigen::DontAlign attribute 2019-02-05 12:50:28 +00:00
5b0870bb19 Added Scalar_ length and Scalar_Unit_Size to Perambulator file for validation 2019-02-05 09:07:05 +00:00
7f5354630a Updated perambulator binary format to save payload in big endian format on disk 2019-02-04 23:07:59 +00:00
008ac6b5ae Permabulator is read back from disk if it exists instead of being created 2019-02-04 12:06:32 +00:00
c7aa4e0c1f Perambulator filename can be specified in xml. NB: Perambulator binary format now includes data size in bytes to avoid type mismatches. 2019-02-04 11:30:30 +00:00
43bd918a47 Logging tweak 2019-02-03 21:48:50 +00:00
7eda54bb87 Only write indices with dimesion!=1 2019-02-03 20:58:58 +00:00
bd75b843fa Added checksum to data 2019-02-03 20:31:42 +00:00
8865bf5d7c Implemented perambulator read/write ... but in binary format. Will switch to Hdf5 when I have Antonins feedback 2019-02-03 17:05:19 +00:00
caabbcd951 minor change 2019-02-01 17:50:18 +00:00
48528c5b1d Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil
Added index names to Perambulator
2019-02-01 15:31:27 +00:00
f7b90a0c14 Added index names to perambulator 2019-02-01 15:20:35 +00:00
a9848becb0 unsmeared sinks can now be computed - new test program available 2019-02-01 13:23:42 +00:00
7cc13f48d5 added some TODO comments; needs discussion 2019-01-31 16:54:11 +00:00
b6b267fd4b Fixed new test parameters 2019-01-31 15:11:12 +00:00
9671a61bb2 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-01-31 15:07:45 +00:00
d7dc617746 Switched perambulator to sue Eigen::Tensor (file write temporarily excluded) 2019-01-31 15:06:52 +00:00
32cb2e1a9a Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-01-31 13:01:31 +00:00
3d31113337 added test t5 to compute meson fields of different quarks. Different nvec are allowed. 2019-01-31 13:01:16 +00:00
48b6f7e6ad Changed PerambLight<FIMPL> to PerambLight<GIMPL> 2019-01-31 12:37:00 +00:00
0da411fe60 LapEvec fixes 2019-01-31 12:28:38 +00:00
d7b9ed199d PerambLight fixes 2019-01-31 12:24:32 +00:00
7e74f7bec4 tsrc != 0 now works 2019-01-31 11:35:05 +00:00
dae7b30b92 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-01-30 21:16:30 +00:00
f7e4661ca0 Fixed grid3d leak in PerambLight 2019-01-30 21:16:09 +00:00
7b66197534 meson fields are now the same 2019-01-30 18:03:34 +00:00
c3273eff20 agreement up to laph vectors 2019-01-30 11:20:22 +00:00
67a3d7aeed added debug output, perambulators now agree up to 8 digits 2019-01-29 16:24:59 +00:00
d8831fe925 changed parameters to match Test_Distil 2019-01-29 13:40:26 +00:00
c7ceff6a21 Switched to Gauge field (GIMPL) 2019-01-28 12:28:35 +00:00
5580b3a7d1 bugfix in DistilVectors 2019-01-28 12:24:47 +00:00
33d8fb2dd9 Default 2019-01-25 19:21:12 +00:00
9f6f776460 ensured there is a default test to run 2019-01-25 19:14:22 +00:00
84fe36d084 meson functions work until to be saved 2019-01-25 17:26:43 +00:00
3438dde8df test prog now computes everything up to meson fields 2019-01-25 15:19:18 +00:00
aea49bc349 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-01-25 13:44:30 +00:00
9ef6f9878e test works up to perambulators now 2019-01-25 13:44:19 +00:00
708ca8585a Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-01-25 13:26:56 +00:00
d15bf4b8e1 Added trajectory number to output file 2019-01-25 13:26:48 +00:00
7496da0987 bugfix in prambLight 2019-01-25 13:08:56 +00:00
2568f5b925 bugfix in prambLight 2019-01-25 12:37:18 +00:00
577cdf1d72 Simplified tests 2019-01-24 18:50:18 +00:00
f92ed659a7 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-01-24 16:30:28 +00:00
dfb7fb1d9f LapEvec test works on --grid 4.4.4.8 2019-01-24 16:30:13 +00:00
a4c1ab6147 all modules linked in test prog 2019-01-24 16:12:19 +00:00
cf85f0388d Still debugging eigenvector parameters 2019-01-24 13:26:05 +00:00
00b0f75b0d Eigenvectors created. Still need to correctly set parameters for test. 2019-01-24 12:44:06 +00:00
b45586e81c Discovered bug root cause. setup() is called multiple times. Now ready to copy-paste the LapEvec code 2019-01-23 21:17:56 +00:00
2c7e6bf58b Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-01-23 15:20:06 +00:00
7c5a06f6d0 Trying to work out why LapEvec constructor not being called 2019-01-23 15:19:51 +00:00
068ef85b05 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-01-23 15:08:24 +00:00
a6ab742fdb added perambs to test 2019-01-23 13:58:20 +00:00
2062a8d578 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-01-23 13:00:20 +00:00
3d3e8f4f9f Structured objects passed into LapEvec 2019-01-23 12:59:55 +00:00
2756f16a5e created test prog for perambs 2019-01-23 12:49:20 +00:00
d7908c33de moved hard-coded parameters in DistilVectors to module input 2019-01-23 11:32:53 +00:00
4cc2ebc9e4 moved hard-coded parameters to module input 2019-01-23 11:26:07 +00:00
b8afa7314c Merge branch 'feature/distil' of https://github.com/mmphys/Grid into feature/distil 2019-01-23 10:51:23 +00:00
be5605931c merge 2019-01-23 10:51:09 +00:00
09fa821510 Added remaining methods to Permabulator 2019-01-22 17:59:55 +00:00
f45d2d5dcc perambLight done, but SliceShare and Write does not work yet 2019-01-22 15:52:26 +00:00
0a82fae45c moved perambulator definition to shared header file 2019-01-22 15:06:45 +00:00
46b05aa9c5 cleaned up, deleted commented out old code 2019-01-22 13:48:44 +00:00
813c1ab1f1 Merge branch 'feature/distil' of https://github.com/mmphys/Grid into feature/distil 2019-01-22 13:28:09 +00:00
b1c27a141d DistilVectors complete and compiling - not tested at all! 2019-01-22 13:27:51 +00:00
81bb361299 Test program ready 2019-01-22 13:19:39 +00:00
79d533550d continued on DistilVectors.hpp 2019-01-21 16:45:31 +00:00
b8c106f320 working on DistilVectors, initialisation done and compiles 2019-01-21 16:04:18 +00:00
b74492a805 Merge branch 'feature/distil' of github.com:mmphys/Grid into feature/distil 2019-01-21 10:40:01 +00:00
c93a43f158 Added test program 2019-01-21 10:39:28 +00:00
0ff410ae19 copied perambulato code into PerambLight.hpp 2019-01-18 17:47:41 +00:00
ced30b61e2 added phi vectors - still commented out and does not compile otherwise 2019-01-18 16:38:13 +00:00
2b782df290 Merge branch 'feature/distil' of https://github.com/mmphys/Grid into feature/distil 2019-01-18 15:58:51 +00:00
f0f1ba0307 uses evec4d now 2019-01-18 15:58:10 +00:00
2343e621e6 Bananas 2019-01-18 13:32:27 +00:00
2568504821 small change 2019-01-18 13:23:03 +00:00
b821dde020 Initial version 2019-01-18 13:14:28 +00:00
ae3b053334 Initial version 2019-01-18 13:10:02 +00:00
137 changed files with 8996 additions and 2012 deletions

View File

@ -35,6 +35,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/algorithms/approx/Zolotarev.h>
#include <Grid/algorithms/approx/Chebyshev.h>
#include <Grid/algorithms/approx/JacobiPolynomial.h>
#include <Grid/algorithms/approx/Remez.h>
#include <Grid/algorithms/approx/MultiShiftFunction.h>
#include <Grid/algorithms/approx/Forecast.h>

View File

@ -1,3 +1,14 @@
// blockZaxpy in bockPromote - 3s, 5%
// noncoalesced linalg in Preconditionoer ~ 3s 5%
// Lancos tuning or replace 10-20s ~ 25%, open ended
// setup tuning 5s ~ 8%
// -- e.g. ordermin, orderstep tunables.
// MdagM path without norm in LinOp code. few seconds
// Mdir calc blocking kernels
// Fuse kernels in blockMaskedInnerProduct
// preallocate Vectors in Cayley 5D ~ few percent few seconds
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -34,15 +45,36 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid);
template<class vobj,class CComplex>
inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner,
const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask,
const Lattice<vobj> &fineX,
const Lattice<vobj> &fineY)
{
typedef decltype(innerProduct(vobj(),vobj())) dotp;
GridBase *coarse(CoarseInner.Grid());
GridBase *fine (fineX.Grid());
Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard();
Lattice<dotp> fine_inner_msk(fine);
// Multiply could be fused with innerProduct
// Single block sum kernel could do both masks.
fine_inner = localInnerProduct(fineX,fineY);
mult(fine_inner_msk, fine_inner,FineMask);
blockSum(CoarseInner,fine_inner_msk);
}
class Geometry {
// int dimension;
public:
int npoint;
std::vector<int> directions ;
std::vector<int> displacements;
Geometry(int _d) {
int base = (_d==5) ? 1:0;
// make coarse grid stencil for 4d , not 5d
@ -52,10 +84,10 @@ public:
directions.resize(npoint);
displacements.resize(npoint);
for(int d=0;d<_d;d++){
directions[2*d ] = d+base;
directions[2*d+1] = d+base;
displacements[2*d ] = +1;
displacements[2*d+1] = -1;
directions[d ] = d+base;
directions[d+_d] = d+base;
displacements[d ] = +1;
displacements[d+_d]= -1;
}
directions [2*_d]=0;
displacements[2*_d]=0;
@ -63,7 +95,7 @@ public:
//// report back
std::cout<<GridLogMessage<<"directions :";
for(int d=0;d<npoint;d++) std::cout<< directions[d]<< " ";
std::cout <<std::endl;
std::cout<<std::endl;
std::cout<<GridLogMessage<<"displacements :";
for(int d=0;d<npoint;d++) std::cout<< displacements[d]<< " ";
std::cout<<std::endl;
@ -115,10 +147,10 @@ public:
void Orthogonalise(void){
CoarseScalar InnerProd(CoarseGrid);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
blockOrthogonalise(InnerProd,subspace);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 1"<<std::endl;
blockOrthogonalise(InnerProd,subspace);
// std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 2"<<std::endl; // Really have to do twice? Yuck
// blockOrthogonalise(InnerProd,subspace);
// std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl;
// CheckOrthogonal();
}
@ -128,7 +160,7 @@ public:
for(int i=0;i<nbasis;i++){
blockProject(iProj,subspace[i],subspace);
eProj=Zero();
thread_for(ss, CoarseGrid->oSites(),{
accelerator_for(ss, CoarseGrid->oSites(),1,{
eProj[ss](i)=CComplex(1.0);
});
eProj=eProj - iProj;
@ -146,66 +178,14 @@ public:
void CreateSubspaceRandom(GridParallelRNG &RNG){
for(int i=0;i<nbasis;i++){
random(RNG,subspace[i]);
std::cout<<GridLogMessage<<" norm subspace["<<i<<"] "<<norm2(subspace[i])<<std::endl;
}
Orthogonalise();
}
/*
virtual void CreateSubspaceLanczos(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis)
{
// Run a Lanczos with sloppy convergence
const int Nstop = nn;
const int Nk = nn+20;
const int Np = nn+20;
const int Nm = Nk+Np;
const int MaxIt= 10000;
RealD resid = 1.0e-3;
Chebyshev<FineField> Cheb(0.5,64.0,21);
ImplicitlyRestartedLanczos<FineField> IRL(hermop,Cheb,Nstop,Nk,Nm,resid,MaxIt);
// IRL.lock = 1;
FineField noise(FineGrid); gaussian(RNG,noise);
FineField tmp(FineGrid);
std::vector<RealD> eval(Nm);
std::vector<FineField> evec(Nm,FineGrid);
int Nconv;
IRL.calc(eval,evec,
noise,
Nconv);
// pull back nn vectors
for(int b=0;b<nn;b++){
subspace[b] = evec[b];
std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
hermop.Op(subspace[b],tmp);
std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(tmp)<<std::endl;
noise = tmp - sqrt(eval[b])*subspace[b] ;
std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<" ; [ M - Lambda ]_"<<b<<" vec_"<<b<<" = " <<norm2(noise)<<std::endl;
noise = tmp + eval[b]*subspace[b] ;
std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<" ; [ M - Lambda ]_"<<b<<" vec_"<<b<<" = " <<norm2(noise)<<std::endl;
}
Orthogonalise();
for(int b=0;b<nn;b++){
std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
}
}
*/
virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
RealD scale;
ConjugateGradient<FineField> CG(1.0e-2,10000);
ConjugateGradient<FineField> CG(1.0e-2,100,false);
FineField noise(FineGrid);
FineField Mn(FineGrid);
@ -232,21 +212,316 @@ public:
subspace[b] = noise;
}
Orthogonalise();
}
////////////////////////////////////////////////////////////////////////////////////////////////
// World of possibilities here. But have tried quite a lot of experiments (250+ jobs run on Summit)
// and this is the best I found
////////////////////////////////////////////////////////////////////////////////////////////////
#if 1
virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,
int nn,
double hi,
double lo,
int orderfilter,
int ordermin,
int orderstep,
double filterlo
) {
RealD scale;
FineField noise(FineGrid);
FineField Mn(FineGrid);
FineField tmp(FineGrid);
// New normalised noise
gaussian(RNG,noise);
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
// Initial matrix element
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
int b =0;
{
// Filter
Chebyshev<FineField> Cheb(lo,hi,orderfilter);
Cheb(hermop,noise,Mn);
// normalise
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
subspace[b] = Mn;
hermop.Op(Mn,tmp);
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
b++;
}
// Generate a full sequence of Chebyshevs
{
lo=filterlo;
noise=Mn;
FineField T0(FineGrid); T0 = noise;
FineField T1(FineGrid);
FineField T2(FineGrid);
FineField y(FineGrid);
FineField *Tnm = &T0;
FineField *Tn = &T1;
FineField *Tnp = &T2;
// Tn=T1 = (xscale M + mscale)in
RealD xscale = 2.0/(hi-lo);
RealD mscale = -(hi+lo)/(hi-lo);
hermop.HermOp(T0,y);
T1=y*xscale+noise*mscale;
for(int n=2;n<=ordermin+orderstep*(nn-2);n++){
hermop.HermOp(*Tn,y);
auto y_v = y.View();
auto Tn_v = Tn->View();
auto Tnp_v = Tnp->View();
auto Tnm_v = Tnm->View();
const int Nsimd = CComplex::Nsimd();
accelerator_forNB(ss, FineGrid->oSites(), Nsimd, {
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
});
// Possible more fine grained control is needed than a linear sweep,
// but huge productivity gain if this is simple algorithm and not a tunable
int m =1;
if ( n>=ordermin ) m=n-ordermin;
if ( (m%orderstep)==0 ) {
Mn=*Tnp;
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
subspace[b] = Mn;
hermop.Op(Mn,tmp);
std::cout<<GridLogMessage << n<<" filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
b++;
}
// Cycle pointers to avoid copies
FineField *swizzle = Tnm;
Tnm =Tn;
Tn =Tnp;
Tnp =swizzle;
}
}
assert(b==nn);
}
#endif
#if 0
virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,
int nn,
double hi,
double lo,
int orderfilter,
int ordermin,
int orderstep,
double filterlo
) {
RealD scale;
FineField noise(FineGrid);
FineField Mn(FineGrid);
FineField tmp(FineGrid);
FineField combined(FineGrid);
// New normalised noise
gaussian(RNG,noise);
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
// Initial matrix element
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
int b =0;
#define FILTERb(llo,hhi,oorder) \
{ \
Chebyshev<FineField> Cheb(llo,hhi,oorder); \
Cheb(hermop,noise,Mn); \
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; \
subspace[b] = Mn; \
hermop.Op(Mn,tmp); \
std::cout<<GridLogMessage << oorder<< " Cheb filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; \
b++; \
}
// JacobiPolynomial<FineField> Cheb(0.002,60.0,1500,-0.5,3.5); \
RealD alpha=-0.8;
RealD beta =-0.8;
#define FILTER(llo,hhi,oorder) \
{ \
Chebyshev<FineField> Cheb(llo,hhi,oorder); \
/* JacobiPolynomial<FineField> Cheb(0.0,60.0,oorder,alpha,beta);*/\
Cheb(hermop,noise,Mn); \
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; \
subspace[b] = Mn; \
hermop.Op(Mn,tmp); \
std::cout<<GridLogMessage << oorder<< "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; \
b++; \
}
#define FILTERc(llo,hhi,oorder) \
{ \
Chebyshev<FineField> Cheb(llo,hhi,oorder); \
Cheb(hermop,noise,combined); \
}
double node = 0.000;
FILTERb(lo,hi,orderfilter);// 0
// FILTERc(node,hi,51);// 0
noise = Mn;
int base = 0;
int mult = 100;
FILTER(node,hi,base+1*mult);
FILTER(node,hi,base+2*mult);
FILTER(node,hi,base+3*mult);
FILTER(node,hi,base+4*mult);
FILTER(node,hi,base+5*mult);
FILTER(node,hi,base+6*mult);
FILTER(node,hi,base+7*mult);
FILTER(node,hi,base+8*mult);
FILTER(node,hi,base+9*mult);
FILTER(node,hi,base+10*mult);
FILTER(node,hi,base+11*mult);
FILTER(node,hi,base+12*mult);
FILTER(node,hi,base+13*mult);
FILTER(node,hi,base+14*mult);
FILTER(node,hi,base+15*mult);
assert(b==nn);
}
#endif
#if 0
virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,
int nn,
double hi,
double lo,
int orderfilter,
int ordermin,
int orderstep,
double filterlo
) {
RealD scale;
FineField noise(FineGrid);
FineField Mn(FineGrid);
FineField tmp(FineGrid);
FineField combined(FineGrid);
// New normalised noise
gaussian(RNG,noise);
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
// Initial matrix element
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
int b =0;
{
Chebyshev<FineField> JacobiPoly(0.005,60.,1500);
// JacobiPolynomial<FineField> JacobiPoly(0.002,60.0,1500,-0.5,3.5);
//JacobiPolynomial<FineField> JacobiPoly(0.03,60.0,500,-0.5,3.5);
// JacobiPolynomial<FineField> JacobiPoly(0.00,60.0,1000,-0.5,3.5);
JacobiPoly(hermop,noise,Mn);
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
subspace[b] = Mn;
hermop.Op(Mn,tmp);
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
b++;
// scale = std::pow(norm2(tmp),-0.5); tmp=tmp*scale;
// subspace[b] = tmp; b++;
// }
}
#define FILTER(lambda) \
{ \
hermop.HermOp(subspace[0],tmp); \
tmp = tmp - lambda *subspace[0]; \
scale = std::pow(norm2(tmp),-0.5); \
tmp=tmp*scale; \
subspace[b] = tmp; \
hermop.Op(subspace[b],tmp); \
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; \
b++; \
}
// scale = std::pow(norm2(tmp),-0.5); tmp=tmp*scale;
// subspace[b] = tmp; b++;
// }
FILTER(2.0e-5);
FILTER(2.0e-4);
FILTER(4.0e-4);
FILTER(8.0e-4);
FILTER(8.0e-4);
FILTER(2.0e-3);
FILTER(3.0e-3);
FILTER(4.0e-3);
FILTER(5.0e-3);
FILTER(6.0e-3);
FILTER(2.5e-3);
FILTER(3.5e-3);
FILTER(4.5e-3);
FILTER(5.5e-3);
FILTER(6.5e-3);
// FILTER(6.0e-5);//6
// FILTER(7.0e-5);//8
// FILTER(8.0e-5);//9
// FILTER(9.0e-5);//3
/*
// FILTER(1.0e-4);//10
FILTER(2.0e-4);//11
// FILTER(3.0e-4);//12
// FILTER(4.0e-4);//13
FILTER(5.0e-4);//14
FILTER(6.0e-3);//4
FILTER(7.0e-4);//1
FILTER(8.0e-4);//7
FILTER(9.0e-4);//15
FILTER(1.0e-3);//2
FILTER(2.0e-3);//2
FILTER(3.0e-3);//2
FILTER(4.0e-3);//2
FILTER(5.0e-3);//2
FILTER(6.0e-3);//2
FILTER(7.0e-3);//2
FILTER(8.0e-3);//2
FILTER(1.0e-2);//2
*/
std::cout << GridLogMessage <<"Jacobi filtering done" <<std::endl;
assert(b==nn);
}
#endif
};
// Fine Object == (per site) type of fine field
// nbasis == number of deflation vectors
template<class Fobj,class CComplex,int nbasis>
class CoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > {
public:
typedef iVector<CComplex,nbasis > siteVector;
typedef iVector<CComplex,nbasis > siteVector;
typedef Lattice<CComplex > CoarseComplexField;
typedef Lattice<siteVector> CoarseVector;
typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
typedef iMatrix<CComplex,nbasis > Cobj;
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj > FineField;
@ -255,11 +530,11 @@ public:
////////////////////
Geometry geom;
GridBase * _grid;
int hermitian;
CartesianStencil<siteVector,siteVector,int> Stencil;
std::vector<CoarseMatrix> A;
///////////////////////
// Interface
@ -271,64 +546,136 @@ public:
conformable(_grid,in.Grid());
conformable(in.Grid(),out.Grid());
// RealD Nin = norm2(in);
SimpleCompressor<siteVector> compressor;
double comms_usec = -usecond();
Stencil.HaloExchange(in,compressor);
comms_usec += usecond();
auto in_v = in.View();
auto out_v = in.View();
thread_for(ss,Grid()->oSites(),{
siteVector res = Zero();
siteVector nbr;
auto out_v = out.View();
typedef LatticeView<Cobj> Aview;
Vector<Aview> AcceleratorViewContainer;
for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View());
Aview *Aview_p = & AcceleratorViewContainer[0];
const int Nsimd = CComplex::Nsimd();
typedef decltype(coalescedRead(in_v[0])) calcVector;
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
GridStopWatch ArithmeticTimer;
int osites=Grid()->oSites();
// double flops = osites*Nsimd*nbasis*nbasis*8.0*geom.npoint;
// double bytes = osites*nbasis*nbasis*geom.npoint*sizeof(CComplex);
double usecs =-usecond();
// assert(geom.npoint==9);
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
int ss = sss/nbasis;
int b = sss%nbasis;
calcComplex res = Zero();
calcVector nbr;
int ptype;
StencilEntry *SE;
int lane=SIMTlane(Nsimd);
for(int point=0;point<geom.npoint;point++){
SE=Stencil.GetEntry(ptype,point,ss);
if(SE->_is_local&&SE->_permute) {
permute(nbr,in_v[SE->_offset],ptype);
} else if(SE->_is_local) {
nbr = in_v[SE->_offset];
if(SE->_is_local) {
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute,lane);
} else {
nbr = Stencil.CommBuf()[SE->_offset];
nbr = coalescedRead(Stencil.CommBuf()[SE->_offset],lane);
}
synchronise();
for(int bb=0;bb<nbasis;bb++) {
res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
}
auto A_point = A[point].View();
res = res + A_point[ss]*nbr;
}
vstream(out_v[ss],res);
coalescedWrite(out_v[ss](b),res,lane);
});
return norm2(out);
usecs +=usecond();
double nrm_usec=-usecond();
RealD Nout= norm2(out);
nrm_usec+=usecond();
/*
std::cout << GridLogMessage << "\tNorm " << nrm_usec << " us" <<std::endl;
std::cout << GridLogMessage << "\tHalo " << comms_usec << " us" <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << usecs << " us" <<std::endl;
std::cout << GridLogMessage << "\t mflop/s " << flops/usecs<<std::endl;
std::cout << GridLogMessage << "\t MB/s " << bytes/usecs<<std::endl;
*/
return Nout;
};
RealD Mdag (const CoarseVector &in, CoarseVector &out){
// // corresponds to Petrov-Galerkin coarsening
// return M(in,out);
// corresponds to Galerkin coarsening
CoarseVector tmp(Grid());
G5C(tmp, in);
M(tmp, out);
G5C(out, out);
return norm2(out);
RealD Mdag (const CoarseVector &in, CoarseVector &out)
{
if(hermitian) {
// corresponds to Petrov-Galerkin coarsening
return M(in,out);
} else {
// corresponds to Galerkin coarsening
CoarseVector tmp(Grid());
G5C(tmp, in);
M(tmp, out);
G5C(out, out);
return norm2(out);
}
};
void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
conformable(_grid,in.Grid());
conformable(in.Grid(),out.Grid());
void MdirComms(const CoarseVector &in)
{
SimpleCompressor<siteVector> compressor;
Stencil.HaloExchange(in,compressor);
auto point = [dir, disp](){
if(dir == 0 and disp == 0)
return 8;
else
return (4 * dir + 1 - disp) / 2;
}();
}
void MdirCalc(const CoarseVector &in, CoarseVector &out, int point)
{
conformable(_grid,in.Grid());
conformable(_grid,out.Grid());
typedef LatticeView<Cobj> Aview;
Vector<Aview> AcceleratorViewContainer;
for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View());
Aview *Aview_p = & AcceleratorViewContainer[0];
auto out_v = out.View();
auto in_v = in.View();
thread_for(ss,Grid()->oSites(),{
const int Nsimd = CComplex::Nsimd();
typedef decltype(coalescedRead(in_v[0])) calcVector;
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
int ss = sss/nbasis;
int b = sss%nbasis;
calcComplex res = Zero();
calcVector nbr;
int ptype;
StencilEntry *SE;
int lane=SIMTlane(Nsimd);
SE=Stencil.GetEntry(ptype,point,ss);
if(SE->_is_local) {
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute,lane);
} else {
nbr = coalescedRead(Stencil.CommBuf()[SE->_offset],lane);
}
synchronise();
for(int bb=0;bb<nbasis;bb++) {
res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
}
coalescedWrite(out_v[ss](b),res,lane);
});
#if 0
accelerator_for(ss,Grid()->oSites(),1,{
siteVector res = Zero();
siteVector nbr;
int ptype;
@ -343,43 +690,112 @@ public:
} else {
nbr = Stencil.CommBuf()[SE->_offset];
}
synchronise();
auto A_point = A[point].View();
res = res + A_point[ss]*nbr;
res = res + Aview_p[point][ss]*nbr;
vstream(out_v[ss],res);
out_v[ss]=res;
});
#endif
}
void MdirAll(const CoarseVector &in,std::vector<CoarseVector> &out)
{
this->MdirComms(in);
int ndir=geom.npoint-1;
if ((out.size()!=ndir)&&(out.size()!=ndir+1)) {
std::cout <<"MdirAll out size "<< out.size()<<std::endl;
std::cout <<"MdirAll ndir "<< ndir<<std::endl;
assert(0);
}
for(int p=0;p<ndir;p++){
MdirCalc(in,out[p],p);
}
};
void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
this->MdirComms(in);
int ndim = in.Grid()->Nd();
//////////////
// 4D action like wilson
// 0+ => 0
// 0- => 1
// 1+ => 2
// 1- => 3
// etc..
//////////////
// 5D action like DWF
// 1+ => 0
// 1- => 1
// 2+ => 2
// 2- => 3
// etc..
auto point = [dir, disp, ndim](){
if(dir == 0 and disp == 0)
return 8;
else if ( ndim==4 ) {
return (4 * dir + 1 - disp) / 2;
} else {
return (4 * (dir-1) + 1 - disp) / 2;
}
}();
MdirCalc(in,out,point);
};
void Mdiag(const CoarseVector &in, CoarseVector &out){
Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil
void Mdiag(const CoarseVector &in, CoarseVector &out)
{
int point=geom.npoint-1;
MdirCalc(in, out, point); // No comms
};
CoarsenedMatrix(GridCartesian &CoarseGrid) :
CoarsenedMatrix(GridCartesian &CoarseGrid, int hermitian_=0) :
_grid(&CoarseGrid),
geom(CoarseGrid._ndimension),
hermitian(hermitian_),
Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
A(geom.npoint,&CoarseGrid)
A(geom.npoint,&CoarseGrid)
{
};
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
Aggregation<Fobj,CComplex,nbasis> & Subspace){
Aggregation<Fobj,CComplex,nbasis> & Subspace)
{
typedef Lattice<typename Fobj::tensor_reduced> FineComplexField;
typedef typename Fobj::scalar_type scalar_type;
FineField iblock(FineGrid); // contributions from within this block
FineField oblock(FineGrid); // contributions from outwith this block
FineComplexField one(FineGrid); one=scalar_type(1.0,0.0);
FineComplexField zero(FineGrid); zero=scalar_type(0.0,0.0);
std::vector<FineComplexField> masks(geom.npoint,FineGrid);
FineComplexField imask(FineGrid); // contributions from within this block
FineComplexField omask(FineGrid); // contributions from outwith this block
FineComplexField evenmask(FineGrid);
FineComplexField oddmask(FineGrid);
FineField phi(FineGrid);
FineField tmp(FineGrid);
FineField zz(FineGrid); zz=Zero();
FineField Mphi(FineGrid);
FineField Mphie(FineGrid);
FineField Mphio(FineGrid);
std::vector<FineField> Mphi_p(geom.npoint,FineGrid);
Lattice<iScalar<vInteger> > coor(FineGrid);
Lattice<iScalar<vInteger> > coor (FineGrid);
Lattice<iScalar<vInteger> > bcoor(FineGrid);
Lattice<iScalar<vInteger> > bcb (FineGrid); bcb = Zero();
CoarseVector iProj(Grid());
CoarseVector oProj(Grid());
CoarseVector SelfProj(Grid());
CoarseComplexField iZProj(Grid());
CoarseComplexField oZProj(Grid());
CoarseScalar InnerProd(Grid());
// Orthogonalise the subblocks over the basis
@ -388,69 +804,117 @@ public:
// Compute the matrix elements of linop between this orthonormal
// set of vectors.
int self_stencil=-1;
for(int p=0;p<geom.npoint;p++){
for(int p=0;p<geom.npoint;p++)
{
int dir = geom.directions[p];
int disp = geom.displacements[p];
A[p]=Zero();
if( geom.displacements[p]==0){
self_stencil=p;
}
Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]);
LatticeCoordinate(coor,dir);
///////////////////////////////////////////////////////
// Work out even and odd block checkerboarding for fast diagonal term
///////////////////////////////////////////////////////
if ( disp==1 ) {
bcb = bcb + div(coor,block);
}
if ( disp==0 ) {
masks[p]= Zero();
} else if ( disp==1 ) {
masks[p] = where(mod(coor,block)==(block-1),one,zero);
} else if ( disp==-1 ) {
masks[p] = where(mod(coor,block)==(Integer)0,one,zero);
}
}
evenmask = where(mod(bcb,2)==(Integer)0,one,zero);
oddmask = one-evenmask;
assert(self_stencil!=-1);
for(int i=0;i<nbasis;i++){
phi=Subspace.subspace[i];
std::cout<<GridLogMessage<<"("<<i<<").."<<std::endl;
// std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
linop.OpDirAll(phi,Mphi_p);
linop.OpDiag (phi,Mphi_p[geom.npoint-1]);
for(int p=0;p<geom.npoint;p++){
Mphi = Mphi_p[p];
int dir = geom.directions[p];
int disp = geom.displacements[p];
Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]);
if ( (disp==-1) || (!hermitian ) ) {
LatticeCoordinate(coor,dir);
if ( disp==0 ){
linop.OpDiag(phi,Mphi);
}
else {
linop.OpDir(phi,Mphi,dir,disp);
}
////////////////////////////////////////////////////////////////////////
// Pick out contributions coming from this cell and neighbour cell
////////////////////////////////////////////////////////////////////////
if ( disp==0 ) {
iblock = Mphi;
oblock = Zero();
} else if ( disp==1 ) {
oblock = where(mod(coor,block)==(block-1),Mphi,zz);
iblock = where(mod(coor,block)!=(block-1),Mphi,zz);
} else if ( disp==-1 ) {
oblock = where(mod(coor,block)==(Integer)0,Mphi,zz);
iblock = where(mod(coor,block)!=(Integer)0,Mphi,zz);
} else {
assert(0);
}
Subspace.ProjectToSubspace(iProj,iblock);
Subspace.ProjectToSubspace(oProj,oblock);
// blockProject(iProj,iblock,Subspace.subspace);
// blockProject(oProj,oblock,Subspace.subspace);
auto iProj_v = iProj.View() ;
auto oProj_v = oProj.View() ;
auto A_p = A[p].View();
auto A_self = A[self_stencil].View();
thread_for(ss, Grid()->oSites(),{
////////////////////////////////////////////////////////////////////////
// Pick out contributions coming from this cell and neighbour cell
////////////////////////////////////////////////////////////////////////
omask = masks[p];
imask = one-omask;
for(int j=0;j<nbasis;j++){
if( disp!= 0 ) {
A_p[ss](j,i) = oProj_v[ss](j);
}
A_self[ss](j,i) = A_self[ss](j,i) + iProj_v[ss](j);
blockMaskedInnerProduct(oZProj,omask,Subspace.subspace[j],Mphi);
auto iZProj_v = iZProj.View() ;
auto oZProj_v = oZProj.View() ;
auto A_p = A[p].View();
auto A_self = A[self_stencil].View();
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
// if( disp!= 0 ) { accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });}
// accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_self[ss](j,i),A_self(ss)(j,i)+iZProj_v(ss)); });
}
}
}
///////////////////////////////////////////
// Faster alternate self coupling.. use hermiticity to save 2x
///////////////////////////////////////////
{
mult(tmp,phi,evenmask); linop.Op(tmp,Mphie);
mult(tmp,phi,oddmask ); linop.Op(tmp,Mphio);
{
auto tmp_ = tmp.View();
auto evenmask_ = evenmask.View();
auto oddmask_ = oddmask.View();
auto Mphie_ = Mphie.View();
auto Mphio_ = Mphio.View();
accelerator_for(ss, FineGrid->oSites(), Fobj::Nsimd(),{
coalescedWrite(tmp_[ss],evenmask_(ss)*Mphie_(ss) + oddmask_(ss)*Mphio_(ss));
});
}
blockProject(SelfProj,tmp,Subspace.subspace);
auto SelfProj_ = SelfProj.View();
auto A_self = A[self_stencil].View();
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{
for(int j=0;j<nbasis;j++){
coalescedWrite(A_self[ss](j,i), SelfProj_(ss)(j));
}
});
}
}
if(hermitian) {
std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl;
ForceHermitian();
}
// AssertHermitian();
// ForceDiagonal();
}
#if 0
///////////////////////////
@ -473,17 +937,26 @@ public:
std::cout<<GridLogMessage<< iProj <<std::endl;
std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
#endif
// ForceHermitian();
// AssertHermitian();
// ForceDiagonal();
}
void ForceHermitian(void) {
for(int d=0;d<4;d++){
int dd=d+1;
A[2*d] = adj(Cshift(A[2*d+1],dd,1));
CoarseMatrix Diff (Grid());
for(int p=0;p<geom.npoint;p++){
int dir = geom.directions[p];
int disp = geom.displacements[p];
if(disp==-1) {
// Find the opposite link
for(int pp=0;pp<geom.npoint;pp++){
int dirp = geom.directions[pp];
int dispp = geom.displacements[pp];
if ( (dirp==dir) && (dispp==1) ){
// Diff = adj(Cshift(A[p],dir,1)) - A[pp];
// std::cout << GridLogMessage<<" Replacing stencil leg "<<pp<<" with leg "<<p<< " diff "<<norm2(Diff) <<std::endl;
A[pp] = adj(Cshift(A[p],dir,1));
}
}
}
}
// A[8] = 0.5*(A[8] + adj(A[8]));
}
void AssertHermitian(void) {
CoarseMatrix AA (Grid());

View File

@ -47,6 +47,7 @@ public:
// Support for coarsening to a multigrid
virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base
virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base
virtual void OpDirAll (const Field &in, std::vector<Field> &out) = 0; // Abstract base
virtual void Op (const Field &in, Field &out) = 0; // Abstract base
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
@ -83,6 +84,9 @@ public:
void OpDir (const Field &in, Field &out,int dir,int disp) {
_Mat.Mdir(in,out,dir,disp);
}
void OpDirAll (const Field &in, std::vector<Field> &out){
_Mat.MdirAll(in,out);
};
void Op (const Field &in, Field &out){
_Mat.M(in,out);
}
@ -93,8 +97,7 @@ public:
_Mat.MdagM(in,out,n1,n2);
}
void HermOp(const Field &in, Field &out){
RealD n1,n2;
HermOpAndNorm(in,out,n1,n2);
_Mat.MdagM(in,out);
}
};
@ -116,6 +119,9 @@ public:
_Mat.Mdir(in,out,dir,disp);
assert(0);
}
void OpDirAll (const Field &in, std::vector<Field> &out){
assert(0);
};
void Op (const Field &in, Field &out){
_Mat.M(in,out);
assert(0);
@ -154,6 +160,9 @@ public:
void OpDir (const Field &in, Field &out,int dir,int disp) {
_Mat.Mdir(in,out,dir,disp);
}
void OpDirAll (const Field &in, std::vector<Field> &out){
_Mat.MdirAll(in,out);
};
void Op (const Field &in, Field &out){
_Mat.M(in,out);
}
@ -162,7 +171,6 @@ public:
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
_Mat.M(in,out);
ComplexD dot= innerProduct(in,out); n1=real(dot);
n2=norm2(out);
}
@ -171,6 +179,35 @@ public:
}
};
template<class Matrix,class Field>
class NonHermitianLinearOperator : public LinearOperatorBase<Field> {
Matrix &_Mat;
public:
NonHermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
// Support for coarsening to a multigrid
void OpDiag (const Field &in, Field &out) {
_Mat.Mdiag(in,out);
}
void OpDir (const Field &in, Field &out,int dir,int disp) {
_Mat.Mdir(in,out,dir,disp);
}
void OpDirAll (const Field &in, std::vector<Field> &out){
_Mat.MdirAll(in,out);
};
void Op (const Field &in, Field &out){
_Mat.M(in,out);
}
void AdjOp (const Field &in, Field &out){
_Mat.Mdag(in,out);
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
assert(0);
}
void HermOp(const Field &in, Field &out){
assert(0);
}
};
//////////////////////////////////////////////////////////
// Even Odd Schur decomp operators; there are several
// ways to introduce the even odd checkerboarding
@ -208,6 +245,9 @@ public:
void OpDir (const Field &in, Field &out,int dir,int disp) {
assert(0);
}
void OpDirAll (const Field &in, std::vector<Field> &out){
assert(0);
};
};
template<class Matrix,class Field>
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
@ -296,7 +336,7 @@ public:
};
///////////////////////////////////////////////////////////////////////////////////////////////////
// Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta
// Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo ) Moo^-1 phi=eta ; psi = Moo^-1 phi
// Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo Moo^-1) phi=eta ; psi = Moo^-1 phi
///////////////////////////////////////////////////////////////////////////////////////////////////
template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ;
template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ;

View File

@ -45,8 +45,13 @@ public:
ni=M(in,tmp);
no=Mdag(tmp,out);
}
virtual void MdagM(const Field &in, Field &out) {
RealD ni, no;
MdagM(in,out,ni,no);
}
virtual void Mdiag (const Field &in, Field &out)=0;
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
virtual void MdirAll (const Field &in, std::vector<Field> &out)=0;
};
/////////////////////////////////////////////////////////////////////////////////////////////
@ -56,12 +61,12 @@ template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrix
public:
virtual GridBase *RedBlackGrid(void)=0;
//////////////////////////////////////////////////////////////////////
// Query the even even properties to make algorithmic decisions
//////////////////////////////////////////////////////////////////////
virtual RealD Mass(void) { return 0.0; };
virtual int ConstEE(void) { return 1; }; // Disable assumptions unless overridden
virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better
//////////////////////////////////////////////////////////////////////
// Query the even even properties to make algorithmic decisions
//////////////////////////////////////////////////////////////////////
virtual RealD Mass(void) { return 0.0; };
virtual int ConstEE(void) { return 1; }; // Disable assumptions unless overridden
virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better
// half checkerboard operaions
virtual void Meooe (const Field &in, Field &out)=0;

View File

@ -94,6 +94,24 @@ public:
Coeffs.assign(0.,order);
Coeffs[order-1] = 1.;
};
// PB - more efficient low pass drops high modes above the low as 1/x uses all Chebyshev's.
// Similar kick effect below the threshold as Lanczos filter approach
void InitLowPass(RealD _lo,RealD _hi,int _order)
{
lo=_lo;
hi=_hi;
order=_order;
if(order < 2) exit(-1);
Coeffs.resize(order);
for(int j=0;j<order;j++){
RealD k=(order-1.0);
RealD s=std::cos( j*M_PI*(k+0.5)/order );
Coeffs[j] = s * 2.0/order;
}
};
void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
{
@ -234,20 +252,20 @@ public:
RealD xscale = 2.0/(hi-lo);
RealD mscale = -(hi+lo)/(hi-lo);
Linop.HermOp(T0,y);
T1=y*xscale+in*mscale;
axpby(T1,xscale,mscale,y,in);
// sum = .5 c[0] T0 + c[1] T1
out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
// out = ()*T0 + Coeffs[1]*T1;
axpby(out,0.5*Coeffs[0],Coeffs[1],T0,T1);
for(int n=2;n<order;n++){
Linop.HermOp(*Tn,y);
y=xscale*y+mscale*(*Tn);
*Tnp=2.0*y-(*Tnm);
out=out+Coeffs[n]* (*Tnp);
// y=xscale*y+mscale*(*Tn);
// *Tnp=2.0*y-(*Tnm);
// out=out+Coeffs[n]* (*Tnp);
axpby(y,xscale,mscale,y,(*Tn));
axpby(*Tnp,2.0,-1.0,y,(*Tnm));
axpy(out,Coeffs[n],*Tnp,out);
// Cycle pointers to avoid copies
Field *swizzle = Tnm;
Tnm =Tn;

View File

@ -0,0 +1,129 @@
#ifndef GRID_JACOBIPOLYNOMIAL_H
#define GRID_JACOBIPOLYNOMIAL_H
#include <Grid/algorithms/LinearOperator.h>
NAMESPACE_BEGIN(Grid);
template<class Field>
class JacobiPolynomial : public OperatorFunction<Field> {
private:
using OperatorFunction<Field>::operator();
int order;
RealD hi;
RealD lo;
RealD alpha;
RealD beta;
public:
void csv(std::ostream &out){
csv(out,lo,hi);
}
void csv(std::ostream &out,RealD llo,RealD hhi){
RealD diff = hhi-llo;
RealD delta = diff*1.0e-5;
for (RealD x=llo-delta; x<=hhi; x+=delta) {
RealD f = approx(x);
out<< x<<" "<<f <<std::endl;
}
return;
}
JacobiPolynomial(){};
JacobiPolynomial(RealD _lo,RealD _hi,int _order,RealD _alpha, RealD _beta)
{
lo=_lo;
hi=_hi;
alpha=_alpha;
beta=_beta;
order=_order;
};
RealD approx(RealD x) // Convenience for plotting the approximation
{
RealD Tn;
RealD Tnm;
RealD Tnp;
RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
RealD T0=1.0;
RealD T1=(alpha-beta)*0.5+(alpha+beta+2.0)*0.5*y;
Tn =T1;
Tnm=T0;
for(int n=2;n<=order;n++){
RealD cnp = 2.0*n*(n+alpha+beta)*(2.0*n-2.0+alpha+beta);
RealD cny = (2.0*n-2.0+alpha+beta)*(2.0*n-1.0+alpha+beta)*(2.0*n+alpha+beta);
RealD cn1 = (2.0*n+alpha+beta-1.0)*(alpha*alpha-beta*beta);
RealD cnm = - 2.0*(n+alpha-1.0)*(n+beta-1.0)*(2.0*n+alpha+beta);
Tnp= ( cny * y *Tn + cn1 * Tn + cnm * Tnm )/ cnp;
Tnm=Tn;
Tn =Tnp;
}
return Tnp;
};
// Implement the required interface
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
GridBase *grid=in.Grid();
int vol=grid->gSites();
Field T0(grid);
Field T1(grid);
Field T2(grid);
Field y(grid);
Field *Tnm = &T0;
Field *Tn = &T1;
Field *Tnp = &T2;
// RealD T0=1.0;
T0=in;
// RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
// = x * 2/(hi-lo) - (hi+lo)/(hi-lo)
Linop.HermOp(T0,y);
RealD xscale = 2.0/(hi-lo);
RealD mscale = -(hi+lo)/(hi-lo);
Linop.HermOp(T0,y);
y=y*xscale+in*mscale;
// RealD T1=(alpha-beta)*0.5+(alpha+beta+2.0)*0.5*y;
RealD halfAmB = (alpha-beta)*0.5;
RealD halfApBp2= (alpha+beta+2.0)*0.5;
T1 = halfAmB * in + halfApBp2*y;
for(int n=2;n<=order;n++){
Linop.HermOp(*Tn,y);
y=xscale*y+mscale*(*Tn);
RealD cnp = 2.0*n*(n+alpha+beta)*(2.0*n-2.0+alpha+beta);
RealD cny = (2.0*n-2.0+alpha+beta)*(2.0*n-1.0+alpha+beta)*(2.0*n+alpha+beta);
RealD cn1 = (2.0*n+alpha+beta-1.0)*(alpha*alpha-beta*beta);
RealD cnm = - 2.0*(n+alpha-1.0)*(n+beta-1.0)*(2.0*n+alpha+beta);
// Tnp= ( cny * y *Tn + cn1 * Tn + cnm * Tnm )/ cnp;
cny=cny/cnp;
cn1=cn1/cnp;
cn1=cn1/cnp;
cnm=cnm/cnp;
*Tnp=cny*y + cn1 *(*Tn) + cnm * (*Tnm);
// Cycle pointers to avoid copies
Field *swizzle = Tnm;
Tnm =Tn;
Tn =Tnp;
Tnp =swizzle;
}
out=*Tnp;
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -71,7 +71,6 @@ public:
// Initial residual computation & set up
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
Linop.HermOpAndNorm(psi, mmp, d, b);
@ -154,18 +153,18 @@ public:
RealD resnorm = std::sqrt(norm2(p));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl;
std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<<std::endl;
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k
<< "\tComputed residual " << std::sqrt(cp / ssq)
<< "\tTrue residual " << true_residual
<< "\tTarget " << Tolerance << std::endl;
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInner " << InnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "Time breakdown "<<std::endl;
std::cout << GridLogIterative << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "\tInner " << InnerTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);

View File

@ -43,6 +43,11 @@ NAMESPACE_BEGIN(Grid);
template<class Field>
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
{
// If assume basis[j] are already orthonormal,
// can take all inner products in parallel saving 2x bandwidth
// Save 3x bandwidth on the second line of loop.
// perhaps 2.5x speed up.
// 2x overall in Multigrid Lanczos
for(int j=0; j<k; ++j){
auto ip = innerProduct(basis[j],w);
w = w - ip*basis[j];
@ -54,17 +59,19 @@ void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, i
{
typedef decltype(basis[0].View()) View;
auto tmp_v = basis[0].View();
std::vector<View> basis_v(basis.size(),tmp_v);
Vector<View> basis_v(basis.size(),tmp_v);
typedef typename Field::vector_object vobj;
GridBase* grid = basis[0].Grid();
for(int k=0;k<basis.size();k++){
basis_v[k] = basis[k].View();
}
#if 0
std::vector < vobj , commAllocator<vobj> > Bt(thread_max() * Nm); // Thread private
thread_region
{
std::vector < vobj , commAllocator<vobj> > B(Nm); // Thread private
vobj* B = Bt.data() + Nm * thread_num();
thread_for_in_region(ss, grid->oSites(),{
for(int j=j0; j<j1; ++j) B[j]=0.;
@ -78,24 +85,89 @@ void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, i
}
});
}
#else
int nrot = j1-j0;
uint64_t oSites =grid->oSites();
uint64_t siteBlock=(grid->oSites()+nrot-1)/nrot; // Maximum 1 additional vector overhead
// printf("BasisRotate %d %d nrot %d siteBlock %d\n",j0,j1,nrot,siteBlock);
Vector <vobj> Bt(siteBlock * nrot);
auto Bp=&Bt[0];
// GPU readable copy of Eigen matrix
Vector<double> Qt_jv(Nm*Nm);
double *Qt_p = & Qt_jv[0];
for(int k=0;k<Nm;++k){
for(int j=0;j<Nm;++j){
Qt_p[j*Nm+k]=Qt(j,k);
}
}
// Block the loop to keep storage footprint down
vobj zz=Zero();
for(uint64_t s=0;s<oSites;s+=siteBlock){
// remaining work in this block
int ssites=MIN(siteBlock,oSites-s);
// zero out the accumulators
accelerator_for(ss,siteBlock*nrot,vobj::Nsimd(),{
auto z=coalescedRead(zz);
coalescedWrite(Bp[ss],z);
});
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
int j =sj%nrot;
int jj =j0+j;
int ss =sj/nrot;
int sss=ss+s;
for(int k=k0; k<k1; ++k){
auto tmp = coalescedRead(Bp[ss*nrot+j]);
coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_v[k][sss]));
}
});
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
int j =sj%nrot;
int jj =j0+j;
int ss =sj/nrot;
int sss=ss+s;
coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j]));
});
}
#endif
}
// Extract a single rotated vector
template<class Field>
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)
{
typedef decltype(basis[0].View()) View;
typedef typename Field::vector_object vobj;
GridBase* grid = basis[0].Grid();
result.Checkerboard() = basis[0].Checkerboard();
auto result_v=result.View();
thread_for(ss, grid->oSites(),{
vobj B = Zero();
Vector<View> basis_v(basis.size(),result_v);
for(int k=0;k<basis.size();k++){
basis_v[k] = basis[k].View();
}
vobj zz=Zero();
Vector<double> Qt_jv(Nm);
double * Qt_j = & Qt_jv[0];
for(int k=0;k<Nm;++k) Qt_j[k]=Qt(j,k);
accelerator_for(ss, grid->oSites(),vobj::Nsimd(),{
auto B=coalescedRead(zz);
for(int k=k0; k<k1; ++k){
auto basis_k = basis[k].View();
B +=Qt(j,k) * basis_k[ss];
B +=Qt_j[k] * coalescedRead(basis_v[k][ss]);
}
result_v[ss] = B;
coalescedWrite(result_v[ss], B);
});
}
@ -279,7 +351,7 @@ public:
RealD _eresid, // resid in lmdue deficit
int _MaxIter, // Max iterations
RealD _betastp=0.0, // if beta(k) < betastp: converged
int _MinRestart=1, int _orth_period = 1,
int _MinRestart=0, int _orth_period = 1,
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(Tester),
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
@ -295,7 +367,7 @@ public:
RealD _eresid, // resid in lmdue deficit
int _MaxIter, // Max iterations
RealD _betastp=0.0, // if beta(k) < betastp: converged
int _MinRestart=1, int _orth_period = 1,
int _MinRestart=0, int _orth_period = 1,
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(SimpleTester),
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
@ -344,7 +416,7 @@ until convergence
GridBase *grid = src.Grid();
assert(grid == evec[0].Grid());
GridLogIRL.TimingMode(1);
// GridLogIRL.TimingMode(1);
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl;
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
@ -369,14 +441,17 @@ until convergence
{
auto src_n = src;
auto tmp = src;
std::cout << GridLogIRL << " IRL source norm " << norm2(src) << std::endl;
const int _MAX_ITER_IRL_MEVAPP_ = 50;
for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) {
normalise(src_n);
_HermOp(src_n,tmp);
// std::cout << GridLogMessage<< tmp<<std::endl; exit(0);
// std::cout << GridLogIRL << " _HermOp " << norm2(tmp) << std::endl;
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
RealD vden = norm2(src_n);
RealD na = vnum/vden;
if (fabs(evalMaxApprox/na - 1.0) < 0.05)
if (fabs(evalMaxApprox/na - 1.0) < 0.0001)
i=_MAX_ITER_IRL_MEVAPP_;
evalMaxApprox = na;
std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
@ -574,11 +649,11 @@ until convergence
/* Saad PP. 195
1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0
2. For k = 1,2,...,m Do:
3. wk:=Avkβkv_{k1}
4. αk:=(wk,vk) //
5. wk:=wkαkvk // wk orthog vk
6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
7. vk+1 := wk/βk+1
3. wk:=Avk - b_k v_{k-1}
4. ak:=(wk,vk) //
5. wk:=wk-akvk // wk orthog vk
6. bk+1 := ||wk||_2. If b_k+1 = 0 then Stop
7. vk+1 := wk/b_k+1
8. EndDo
*/
void step(std::vector<RealD>& lmd,
@ -586,6 +661,7 @@ until convergence
std::vector<Field>& evec,
Field& w,int Nm,int k)
{
std::cout<<GridLogIRL << "Lanczos step " <<k<<std::endl;
const RealD tiny = 1.0e-20;
assert( k< Nm );
@ -597,20 +673,20 @@ until convergence
if(k>0) w -= lme[k-1] * evec[k-1];
ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk)
ComplexD zalph = innerProduct(evec_k,w);
RealD alph = real(zalph);
w = w - alph * evec_k;// 5. wk:=wkαkvk
w = w - alph * evec_k;
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
// 7. vk+1 := wk/βk+1
RealD beta = normalise(w);
lmd[k] = alph;
lme[k] = beta;
if (k>0 && k % orth_period == 0) {
if ( (k>0) && ( (k % orth_period) == 0 )) {
std::cout<<GridLogIRL << "Orthogonalising " <<k<<std::endl;
orthogonalize(w,evec,k); // orthonormalise
std::cout<<GridLogIRL << "Orthogonalised " <<std::endl;
std::cout<<GridLogIRL << "Orthogonalised " <<k<<std::endl;
}
if(k < Nm-1) evec[k+1] = w;
@ -618,6 +694,8 @@ until convergence
std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
if ( beta < tiny )
std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl;
std::cout<<GridLogIRL << "Lanczos step complete " <<k<<std::endl;
}
void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,

View File

@ -33,26 +33,78 @@ NAMESPACE_BEGIN(Grid);
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form an NE solver calling a Herm solver
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class NormalEquations : public OperatorFunction<Field>{
template<class Field> class NormalEquations {
private:
SparseMatrixBase<Field> & _Matrix;
OperatorFunction<Field> & _HermitianSolver;
LinearFunction<Field> & _Guess;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations trick
/////////////////////////////////////////////////////
NormalEquations(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver)
: _Matrix(Matrix), _HermitianSolver(HermitianSolver) {};
NormalEquations(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver,
LinearFunction<Field> &Guess)
: _Matrix(Matrix), _HermitianSolver(HermitianSolver), _Guess(Guess) {};
void operator() (const Field &in, Field &out){
Field src(in.Grid());
Field tmp(in.Grid());
MdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(_Matrix);
_Matrix.Mdag(in,src);
_HermitianSolver(src,out); // Mdag M out = Mdag in
_Guess(src,out);
_HermitianSolver(MdagMOp,src,out); // Mdag M out = Mdag in
}
};
template<class Field> class HPDSolver {
private:
LinearOperatorBase<Field> & _Matrix;
OperatorFunction<Field> & _HermitianSolver;
LinearFunction<Field> & _Guess;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations trick
/////////////////////////////////////////////////////
HPDSolver(LinearOperatorBase<Field> &Matrix,
OperatorFunction<Field> &HermitianSolver,
LinearFunction<Field> &Guess)
: _Matrix(Matrix), _HermitianSolver(HermitianSolver), _Guess(Guess) {};
void operator() (const Field &in, Field &out){
_Guess(in,out);
_HermitianSolver(_Matrix,in,out); // Mdag M out = Mdag in
}
};
template<class Field> class MdagMSolver {
private:
SparseMatrixBase<Field> & _Matrix;
OperatorFunction<Field> & _HermitianSolver;
LinearFunction<Field> & _Guess;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations trick
/////////////////////////////////////////////////////
MdagMSolver(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver,
LinearFunction<Field> &Guess)
: _Matrix(Matrix), _HermitianSolver(HermitianSolver), _Guess(Guess) {};
void operator() (const Field &in, Field &out){
MdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(_Matrix);
_Guess(in,out);
_HermitianSolver(MdagMOp,in,out); // Mdag M out = Mdag in
}
};

View File

@ -30,12 +30,12 @@ template<class Field> class PowerMethod
RealD vden = norm2(src_n);
RealD na = vnum/vden;
if ( (fabs(evalMaxApprox/na - 1.0) < 0.01) || (i==_MAX_ITER_EST_-1) ) {
if ( (fabs(evalMaxApprox/na - 1.0) < 0.001) || (i==_MAX_ITER_EST_-1) ) {
evalMaxApprox = na;
std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
return evalMaxApprox;
}
evalMaxApprox = na;
std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
src_n = tmp;
}
assert(0);

View File

@ -38,10 +38,11 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
///////////////////////////////////////////////////////////////////////////////////////////////////////
NAMESPACE_BEGIN(Grid);
#define GCRLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level<<" "
template<class Field>
class PrecGeneralisedConjugateResidual : public OperatorFunction<Field> {
class PrecGeneralisedConjugateResidual : public LinearFunction<Field> {
public:
using OperatorFunction<Field>::operator();
RealD Tolerance;
Integer MaxIterations;
@ -49,23 +50,29 @@ public:
int mmax;
int nstep;
int steps;
int level;
GridStopWatch PrecTimer;
GridStopWatch MatTimer;
GridStopWatch LinalgTimer;
LinearFunction<Field> &Preconditioner;
LinearFunction<Field> &Preconditioner;
LinearOperatorBase<Field> &Linop;
PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec,int _mmax,int _nstep) :
void Level(int lv) { level=lv; };
PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearOperatorBase<Field> &_Linop,LinearFunction<Field> &Prec,int _mmax,int _nstep) :
Tolerance(tol),
MaxIterations(maxit),
Linop(_Linop),
Preconditioner(Prec),
mmax(_mmax),
nstep(_nstep)
{
level=1;
verbose=1;
};
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
void operator() (const Field &src, Field &psi){
psi=Zero();
RealD cp, ssq,rsq;
@ -84,9 +91,9 @@ public:
steps=0;
for(int k=0;k<MaxIterations;k++){
cp=GCRnStep(Linop,src,psi,rsq);
cp=GCRnStep(src,psi,rsq);
std::cout<<GridLogMessage<<"VPGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<std::endl;
GCRLogLevel <<"PGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<" target "<<rsq <<std::endl;
if(cp<rsq) {
@ -95,24 +102,26 @@ public:
Linop.HermOp(psi,r);
axpy(r,-1.0,src,r);
RealD tr = norm2(r);
std::cout<<GridLogMessage<<"PrecGeneralisedConjugateResidual: Converged on iteration " <<steps
GCRLogLevel<<"PGCR: Converged on iteration " <<steps
<< " computed residual "<<sqrt(cp/ssq)
<< " true residual " <<sqrt(tr/ssq)
<< " target " <<Tolerance <<std::endl;
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Total "<< SolverTimer.Elapsed() <<std::endl;
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Precon "<< PrecTimer.Elapsed() <<std::endl;
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Matrix "<< MatTimer.Elapsed() <<std::endl;
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Linalg "<< LinalgTimer.Elapsed() <<std::endl;
GCRLogLevel<<"PGCR Time elapsed: Total "<< SolverTimer.Elapsed() <<std::endl;
/*
GCRLogLevel<<"PGCR Time elapsed: Precon "<< PrecTimer.Elapsed() <<std::endl;
GCRLogLevel<<"PGCR Time elapsed: Matrix "<< MatTimer.Elapsed() <<std::endl;
GCRLogLevel<<"PGCR Time elapsed: Linalg "<< LinalgTimer.Elapsed() <<std::endl;
*/
return;
}
}
std::cout<<GridLogMessage<<"Variable Preconditioned GCR did not converge"<<std::endl;
assert(0);
GCRLogLevel<<"Variable Preconditioned GCR did not converge"<<std::endl;
// assert(0);
}
RealD GCRnStep(LinearOperatorBase<Field> &Linop,const Field &src, Field &psi,RealD rsq){
RealD GCRnStep(const Field &src, Field &psi,RealD rsq){
RealD cp;
RealD a, b;
@ -134,6 +143,7 @@ public:
std::vector<Field> p(mmax,grid);
std::vector<RealD> qq(mmax);
GCRLogLevel<< "PGCR nStep("<<nstep<<")"<<std::endl;
//////////////////////////////////
// initial guess x0 is taken as nonzero.
@ -143,38 +153,26 @@ public:
Linop.HermOpAndNorm(psi,Az,zAz,zAAz);
MatTimer.Stop();
LinalgTimer.Start();
r=src-Az;
LinalgTimer.Stop();
GCRLogLevel<< "PGCR true residual r = src - A psi "<<norm2(r) <<std::endl;
/////////////////////
// p = Prec(r)
/////////////////////
PrecTimer.Start();
Preconditioner(r,z);
PrecTimer.Stop();
MatTimer.Start();
Linop.HermOp(z,tmp);
MatTimer.Stop();
LinalgTimer.Start();
ttmp=tmp;
tmp=tmp-r;
LinalgTimer.Stop();
/*
std::cout<<GridLogMessage<<r<<std::endl;
std::cout<<GridLogMessage<<z<<std::endl;
std::cout<<GridLogMessage<<ttmp<<std::endl;
std::cout<<GridLogMessage<<tmp<<std::endl;
*/
MatTimer.Start();
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
MatTimer.Stop();
LinalgTimer.Start();
//p[0],q[0],qq[0]
p[0]= z;
q[0]= Az;
@ -200,11 +198,12 @@ public:
cp = axpy_norm(r,-a,q[peri_k],r);
LinalgTimer.Stop();
GCRLogLevel<< "PGCR step["<<steps<<"] resid " << cp << " target " <<rsq<<std::endl;
if((k==nstep-1)||(cp<rsq)){
return cp;
}
std::cout<<GridLogMessage<< " VPGCR_step["<<steps<<"] resid " <<sqrt(cp/rsq)<<std::endl;
PrecTimer.Start();
Preconditioner(r,z);// solve Az = r
@ -212,12 +211,9 @@ public:
MatTimer.Start();
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
Linop.HermOp(z,tmp);
MatTimer.Stop();
LinalgTimer.Start();
tmp=tmp-r;
std::cout<<GridLogMessage<< " Preconditioner resid " <<sqrt(norm2(tmp)/norm2(r))<<std::endl;
q[peri_kp]=Az;
p[peri_kp]=z;

View File

@ -0,0 +1,371 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithmsf/iterative/QuasiMinimalResidual.h
Copyright (C) 2019
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
template<class Field>
RealD innerG5ProductReal(Field &l, Field &r)
{
Gamma G5(Gamma::Algebra::Gamma5);
Field tmp(l.Grid());
// tmp = G5*r;
G5R5(tmp,r);
ComplexD ip =innerProduct(l,tmp);
std::cout << "innerProductRealG5R5 "<<ip<<std::endl;
return ip.real();
}
template<class Field>
class QuasiMinimalResidual : public OperatorFunction<Field> {
public:
using OperatorFunction<Field>::operator();
bool ErrorOnNoConverge;
RealD Tolerance;
Integer MaxIterations;
Integer IterationCount;
QuasiMinimalResidual(RealD tol,
Integer maxit,
bool err_on_no_conv = true)
: Tolerance(tol)
, MaxIterations(maxit)
, ErrorOnNoConverge(err_on_no_conv)
{};
#if 1
void operator()(LinearOperatorBase<Field> &LinOp, const Field &b, Field &x)
{
RealD resid;
IterationCount=0;
RealD rho, rho_1, xi, gamma, gamma_1, theta, theta_1;
RealD eta, delta, ep, beta;
GridBase *Grid = b.Grid();
Field r(Grid), d(Grid), s(Grid);
Field v(Grid), w(Grid), y(Grid), z(Grid);
Field v_tld(Grid), w_tld(Grid), y_tld(Grid), z_tld(Grid);
Field p(Grid), q(Grid), p_tld(Grid);
Real normb = norm2(b);
LinOp.Op(x,r); r = b - r;
assert(normb> 0.0);
resid = norm2(r)/normb;
if (resid <= Tolerance) {
return;
}
v_tld = r;
y = v_tld;
rho = norm2(y);
// Take Gamma5 conjugate
// Gamma G5(Gamma::Algebra::Gamma5);
// G5R5(w_tld,r);
// w_tld = G5* v_tld;
w_tld=v_tld;
z = w_tld;
xi = norm2(z);
gamma = 1.0;
eta = -1.0;
theta = 0.0;
for (int i = 1; i <= MaxIterations; i++) {
// Breakdown tests
assert( rho != 0.0);
assert( xi != 0.0);
v = (1. / rho) * v_tld;
y = (1. / rho) * y;
w = (1. / xi) * w_tld;
z = (1. / xi) * z;
ComplexD Zdelta = innerProduct(z, y); // Complex?
std::cout << "Zdelta "<<Zdelta<<std::endl;
delta = Zdelta.real();
y_tld = y;
z_tld = z;
if (i > 1) {
p = y_tld - (xi * delta / ep) * p;
q = z_tld - (rho * delta / ep) * q;
} else {
p = y_tld;
q = z_tld;
}
LinOp.Op(p,p_tld); // p_tld = A * p;
ComplexD Zep = innerProduct(q, p_tld);
ep=Zep.real();
std::cout << "Zep "<<Zep <<std::endl;
// Complex Audit
assert(abs(ep)>0);
beta = ep / delta;
assert(abs(beta)>0);
v_tld = p_tld - beta * v;
y = v_tld;
rho_1 = rho;
rho = norm2(y);
LinOp.AdjOp(q,w_tld);
w_tld = w_tld - beta * w;
z = w_tld;
xi = norm2(z);
gamma_1 = gamma;
theta_1 = theta;
theta = rho / (gamma_1 * beta);
gamma = 1.0 / sqrt(1.0 + theta * theta);
std::cout << "theta "<<theta<<std::endl;
std::cout << "gamma "<<gamma<<std::endl;
assert(abs(gamma)> 0.0);
eta = -eta * rho_1 * gamma* gamma / (beta * gamma_1 * gamma_1);
if (i > 1) {
d = eta * p + (theta_1 * theta_1 * gamma * gamma) * d;
s = eta * p_tld + (theta_1 * theta_1 * gamma * gamma) * s;
} else {
d = eta * p;
s = eta * p_tld;
}
x =x+d; // update approximation vector
r =r-s; // compute residual
if ((resid = norm2(r) / normb) <= Tolerance) {
return;
}
std::cout << "Iteration "<<i<<" resid " << resid<<std::endl;
}
assert(0);
return; // no convergence
}
#else
// QMRg5 SMP thesis
void operator()(LinearOperatorBase<Field> &LinOp, const Field &b, Field &x)
{
// Real scalars
GridBase *grid = b.Grid();
Field r(grid);
Field p_m(grid), p_m_minus_1(grid), p_m_minus_2(grid);
Field v_m(grid), v_m_minus_1(grid), v_m_plus_1(grid);
Field tmp(grid);
RealD w;
RealD z1, z2;
RealD delta_m, delta_m_minus_1;
RealD c_m_plus_1, c_m, c_m_minus_1;
RealD s_m_plus_1, s_m, s_m_minus_1;
RealD alpha, beta, gamma, epsilon;
RealD mu, nu, rho, theta, xi, chi;
RealD mod2r, mod2b;
RealD tau2, target2;
mod2b=norm2(b);
/////////////////////////
// Initial residual
/////////////////////////
LinOp.Op(x,tmp);
r = b - tmp;
/////////////////////////
// \mu = \rho = |r_0|
/////////////////////////
mod2r = norm2(r);
rho = sqrt( mod2r);
mu=rho;
std::cout << "QuasiMinimalResidual rho "<< rho<<std::endl;
/////////////////////////
// Zero negative history
/////////////////////////
v_m_plus_1 = Zero();
v_m_minus_1 = Zero();
p_m_minus_1 = Zero();
p_m_minus_2 = Zero();
// v0
v_m = (1.0/rho)*r;
/////////////////////////
// Initial coeffs
/////////////////////////
delta_m_minus_1 = 1.0;
c_m_minus_1 = 1.0;
c_m = 1.0;
s_m_minus_1 = 0.0;
s_m = 0.0;
/////////////////////////
// Set up convergence check
/////////////////////////
tau2 = mod2r;
target2 = mod2b * Tolerance*Tolerance;
for(int iter = 0 ; iter < MaxIterations; iter++){
/////////////////////////
// \delta_m = (v_m, \gamma_5 v_m)
/////////////////////////
delta_m = innerG5ProductReal(v_m,v_m);
std::cout << "QuasiMinimalResidual delta_m "<< delta_m<<std::endl;
/////////////////////////
// tmp = A v_m
/////////////////////////
LinOp.Op(v_m,tmp);
/////////////////////////
// \alpha = (v_m, \gamma_5 temp) / \delta_m
/////////////////////////
alpha = innerG5ProductReal(v_m,tmp);
alpha = alpha/delta_m ;
std::cout << "QuasiMinimalResidual alpha "<< alpha<<std::endl;
/////////////////////////
// \beta = \rho \delta_m / \delta_{m-1}
/////////////////////////
beta = rho * delta_m / delta_m_minus_1;
std::cout << "QuasiMinimalResidual beta "<< beta<<std::endl;
/////////////////////////
// \tilde{v}_{m+1} = temp - \alpha v_m - \beta v_{m-1}
/////////////////////////
v_m_plus_1 = tmp - alpha*v_m - beta*v_m_minus_1;
///////////////////////////////
// \rho = || \tilde{v}_{m+1} ||
///////////////////////////////
rho = sqrt( norm2(v_m_plus_1) );
std::cout << "QuasiMinimalResidual rho "<< rho<<std::endl;
///////////////////////////////
// v_{m+1} = \tilde{v}_{m+1}
///////////////////////////////
v_m_plus_1 = (1.0 / rho) * v_m_plus_1;
////////////////////////////////
// QMR recurrence coefficients.
////////////////////////////////
theta = s_m_minus_1 * beta;
gamma = c_m_minus_1 * beta;
epsilon = c_m * gamma + s_m * alpha;
xi = -s_m * gamma + c_m * alpha;
nu = sqrt( xi*xi + rho*rho );
c_m_plus_1 = fabs(xi) / nu;
if ( xi == 0.0 ) {
s_m_plus_1 = 1.0;
} else {
s_m_plus_1 = c_m_plus_1 * rho / xi;
}
chi = c_m_plus_1 * xi + s_m_plus_1 * rho;
std::cout << "QuasiMinimalResidual coeffs "<< theta <<" "<<gamma<<" "<< epsilon<<" "<< xi<<" "<< nu<<std::endl;
std::cout << "QuasiMinimalResidual coeffs "<< chi <<std::endl;
////////////////////////////////
//p_m=(v_m - \epsilon p_{m-1} - \theta p_{m-2}) / \chi
////////////////////////////////
p_m = (1.0/chi) * v_m - (epsilon/chi) * p_m_minus_1 - (theta/chi) * p_m_minus_2;
////////////////////////////////////////////////////////////////
// \psi = \psi + c_{m+1} \mu p_m
////////////////////////////////////////////////////////////////
x = x + ( c_m_plus_1 * mu ) * p_m;
////////////////////////////////////////
//
////////////////////////////////////////
mu = -s_m_plus_1 * mu;
delta_m_minus_1 = delta_m;
c_m_minus_1 = c_m;
c_m = c_m_plus_1;
s_m_minus_1 = s_m;
s_m = s_m_plus_1;
////////////////////////////////////
// Could use pointer swizzle games.
////////////////////////////////////
v_m_minus_1 = v_m;
v_m = v_m_plus_1;
p_m_minus_2 = p_m_minus_1;
p_m_minus_1 = p_m;
/////////////////////////////////////
// Convergence checks
/////////////////////////////////////
z1 = RealD(iter+1.0);
z2 = z1 + 1.0;
tau2 = tau2 *( z2 / z1 ) * s_m * s_m;
std::cout << " QuasiMinimumResidual iteration "<< iter<<std::endl;
std::cout << " QuasiMinimumResidual tau bound "<< tau2<<std::endl;
// Compute true residual
mod2r = tau2;
if ( 1 || (tau2 < (100.0 * target2)) ) {
LinOp.Op(x,tmp);
r = b - tmp;
mod2r = norm2(r);
std::cout << " QuasiMinimumResidual true residual is "<< mod2r<<std::endl;
}
if ( mod2r < target2 ) {
std::cout << " QuasiMinimumResidual has converged"<<std::endl;
return;
}
}
}
#endif
};
NAMESPACE_END(Grid);

View File

@ -6,6 +6,12 @@ NAMESPACE_BEGIN(Grid);
MemoryStats *MemoryProfiler::stats = nullptr;
bool MemoryProfiler::debug = false;
#ifdef GRID_NVCC
#define SMALL_LIMIT (0)
#else
#define SMALL_LIMIT (4096)
#endif
#ifdef POINTER_CACHE
int PointerCache::victim;
@ -13,7 +19,7 @@ PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
void *PointerCache::Insert(void *ptr,size_t bytes) {
if (bytes < 4096 ) return ptr;
if (bytes < SMALL_LIMIT ) return ptr;
#ifdef GRID_OMP
assert(omp_in_parallel()==0);
@ -50,7 +56,7 @@ void *PointerCache::Insert(void *ptr,size_t bytes) {
void *PointerCache::Lookup(size_t bytes) {
if (bytes < 4096 ) return NULL;
if (bytes < SMALL_LIMIT ) return NULL;
#ifdef GRID_OMP
assert(omp_in_parallel()==0);

View File

@ -49,8 +49,13 @@ NAMESPACE_BEGIN(Grid);
#ifdef POINTER_CACHE
class PointerCache {
private:
/*Pinning pages is costly*/
/*Could maintain separate large and small allocation caches*/
#ifdef GRID_NVCC
static const int Ncache=128;
#else
static const int Ncache=8;
#endif
static int victim;
typedef struct {
@ -63,7 +68,6 @@ private:
public:
static void *Insert(void *ptr,size_t bytes) ;
static void *Lookup(size_t bytes) ;
@ -170,13 +174,14 @@ public:
// Unified (managed) memory
////////////////////////////////////
if ( ptr == (_Tp *) NULL ) {
// printf(" alignedAllocater cache miss %ld bytes ",bytes); BACKTRACEFP(stdout);
auto err = cudaMallocManaged((void **)&ptr,bytes);
if( err != cudaSuccess ) {
ptr = (_Tp *) NULL;
std::cerr << " cudaMallocManaged failed for " << bytes<<" bytes " <<cudaGetErrorString(err)<< std::endl;
assert(0);
}
}
}
assert( ptr != (_Tp *)NULL);
#else
//////////////////////////////////////////////////////////////////////////////////////////

View File

@ -47,20 +47,19 @@ public:
// Give Lattice access
template<class object> friend class Lattice;
GridBase(const Coordinate & processor_grid) : CartesianCommunicator(processor_grid) {};
GridBase(const Coordinate & processor_grid) : CartesianCommunicator(processor_grid) { LocallyPeriodic=0;};
GridBase(const Coordinate & processor_grid,
const CartesianCommunicator &parent,
int &split_rank)
: CartesianCommunicator(processor_grid,parent,split_rank) {};
: CartesianCommunicator(processor_grid,parent,split_rank) {LocallyPeriodic=0;};
GridBase(const Coordinate & processor_grid,
const CartesianCommunicator &parent)
: CartesianCommunicator(processor_grid,parent,dummy) {};
: CartesianCommunicator(processor_grid,parent,dummy) {LocallyPeriodic=0;};
virtual ~GridBase() = default;
// Physics Grid information.
Coordinate _simd_layout;// Which dimensions get relayed out over simd lanes.
Coordinate _fdimensions;// (full) Global dimensions of array prior to cb removal
@ -80,7 +79,8 @@ public:
Coordinate _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d]
Coordinate _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1
bool _isCheckerBoarded;
bool _isCheckerBoarded;
int LocallyPeriodic;
public:

View File

@ -41,9 +41,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <sys/shm.h>
#include <sys/mman.h>
#include <zlib.h>
#ifdef HAVE_NUMAIF_H
#include <numaif.h>
#endif
NAMESPACE_BEGIN(Grid);
@ -99,6 +96,7 @@ public:
static void OptimalCommunicator (const Coordinate &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian
static void OptimalCommunicatorHypercube (const Coordinate &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian
static void OptimalCommunicatorSharedMemory(const Coordinate &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian
static void GetShmDims(const Coordinate &WorldDims,Coordinate &ShmDims);
///////////////////////////////////////////////////
// Provide shared memory facilities off comm world
///////////////////////////////////////////////////

View File

@ -155,6 +155,35 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M
if(nscan==3 && HPEhypercube ) OptimalCommunicatorHypercube(processors,optimal_comm);
else OptimalCommunicatorSharedMemory(processors,optimal_comm);
}
static inline int divides(int a,int b)
{
return ( b == ( (b/a)*a ) );
}
void GlobalSharedMemory::GetShmDims(const Coordinate &WorldDims,Coordinate &ShmDims)
{
////////////////////////////////////////////////////////////////
// Powers of 2,3,5 only in prime decomposition for now
////////////////////////////////////////////////////////////////
int ndimension = WorldDims.size();
ShmDims=Coordinate(ndimension,1);
std::vector<int> primes({2,3,5});
int dim = 0;
int AutoShmSize = 1;
while(AutoShmSize != WorldShmSize) {
for(int p=0;p<primes.size();p++) {
int prime=primes[p];
if ( divides(prime,WorldDims[dim]/ShmDims[dim])
&& divides(prime,WorldShmSize/AutoShmSize) ) {
AutoShmSize*=prime;
ShmDims[dim]*=prime;
break;
}
}
dim=(dim+1) %ndimension;
}
}
void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processors,Grid_MPI_Comm & optimal_comm)
{
////////////////////////////////////////////////////////////////
@ -221,17 +250,13 @@ void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processo
// in a maximally symmetrical way
////////////////////////////////////////////////////////////////
int ndimension = processors.size();
std::vector<int> processor_coor(ndimension);
std::vector<int> WorldDims = processors.toVector();
std::vector<int> ShmDims (ndimension,1); std::vector<int> NodeDims (ndimension);
std::vector<int> ShmCoor (ndimension); std::vector<int> NodeCoor (ndimension); std::vector<int> WorldCoor(ndimension);
std::vector<int> HyperCoor(ndimension);
int dim = 0;
for(int l2=0;l2<log2size;l2++){
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
ShmDims[dim]*=2;
dim=(dim+1)%ndimension;
}
Coordinate processor_coor(ndimension);
Coordinate WorldDims = processors;
Coordinate ShmDims (ndimension); Coordinate NodeDims (ndimension);
Coordinate ShmCoor (ndimension); Coordinate NodeCoor (ndimension); Coordinate WorldCoor(ndimension);
Coordinate HyperCoor(ndimension);
GetShmDims(WorldDims,ShmDims);
////////////////////////////////////////////////////////////////
// Establish torus of processes and nodes with sub-blockings
@ -281,27 +306,16 @@ void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processo
}
void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const Coordinate &processors,Grid_MPI_Comm & optimal_comm)
{
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
assert(log2size != -1);
////////////////////////////////////////////////////////////////
// Identify subblock of ranks on node spreading across dims
// in a maximally symmetrical way
////////////////////////////////////////////////////////////////
int ndimension = processors.size();
Coordinate processor_coor(ndimension);
Coordinate WorldDims = processors; Coordinate ShmDims(ndimension,1); Coordinate NodeDims (ndimension);
Coordinate WorldDims = processors; Coordinate ShmDims(ndimension); Coordinate NodeDims (ndimension);
Coordinate ShmCoor(ndimension); Coordinate NodeCoor(ndimension); Coordinate WorldCoor(ndimension);
int dim = 0;
for(int l2=0;l2<log2size;l2++){
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
ShmDims[dim]*=2;
dim=(dim+1)%ndimension;
}
GetShmDims(WorldDims,ShmDims);
////////////////////////////////////////////////////////////////
// Establish torus of processes and nodes with sub-blockings
////////////////////////////////////////////////////////////////
@ -418,7 +432,14 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
// e.g. DGX1, supermicro board,
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// cudaDeviceGetP2PAttribute(&perfRank, cudaDevP2PAttrPerformanceRank, device1, device2);
cudaSetDevice(WorldShmRank);
#ifdef GRID_IBM_SUMMIT
// IBM Jsrun makes cuda Device numbering screwy and not match rank
std::cout << "IBM Summit or similar - NOT setting device to WorldShmRank"<<std::endl;
#else
std::cout << "setting device to WorldShmRank"<<std::endl;
cudaSetDevice(WorldShmRank);
#endif
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Each MPI rank should allocate our own buffer
///////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -445,7 +466,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
// If it is me, pass around the IPC access key
//////////////////////////////////////////////////
cudaIpcMemHandle_t handle;
if ( r==WorldShmRank ) {
err = cudaIpcGetMemHandle(&handle,ShmCommBuf);
if ( err != cudaSuccess) {
@ -714,6 +735,24 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
std::vector<int> ranks(size); for(int r=0;r<size;r++) ranks[r]=r;
MPI_Group_translate_ranks (FullGroup,size,&ranks[0],ShmGroup, &ShmRanks[0]);
#ifdef GRID_IBM_SUMMIT
// Hide the shared memory path between sockets
// if even number of nodes
if ( (ShmSize & 0x1)==0 ) {
int SocketSize = ShmSize/2;
int mySocket = ShmRank/SocketSize;
for(int r=0;r<size;r++){
int hisRank=ShmRanks[r];
if ( hisRank!= MPI_UNDEFINED ) {
int hisSocket=hisRank/SocketSize;
if ( hisSocket != mySocket ) {
ShmRanks[r] = MPI_UNDEFINED;
}
}
}
}
#endif
SharedMemoryTest();
}
//////////////////////////////////////////////////////////////////

View File

@ -173,13 +173,14 @@ public:
///////////////////////////////////////////////////
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_object scalar_object;
typedef vobj vector_object;
private:
void dealloc(void)
{
alignedAllocator<vobj> alloc;
if( this->_odata_size ) {
alignedAllocator<vobj> alloc;
alloc.deallocate(this->_odata,this->_odata_size);
this->_odata=nullptr;
this->_odata_size=0;
@ -187,15 +188,17 @@ private:
}
void resize(uint64_t size)
{
alignedAllocator<vobj> alloc;
if ( this->_odata_size != size ) {
alignedAllocator<vobj> alloc;
dealloc();
this->_odata_size = size;
if ( size )
this->_odata = alloc.allocate(this->_odata_size);
else
this->_odata = nullptr;
}
this->_odata_size = size;
if ( size )
this->_odata = alloc.allocate(this->_odata_size);
else
this->_odata = nullptr;
}
public:
/////////////////////////////////////////////////////////////////////////////////
@ -346,7 +349,7 @@ public:
void reset(GridBase* grid) {
if (this->_grid != grid) {
this->_grid = grid;
this->_odata.resize(grid->oSites());
this->resize(grid->oSites());
this->checkerboard = 0;
}
}

View File

@ -37,19 +37,18 @@ template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
GridBase *grid = l.Grid();
int Nsimd = grid->iSites();
Coordinate gcoor;
ExtractBuffer<scalar_type> mergebuf(Nsimd);
vector_type vI;
auto l_v = l.View();
for(int o=0;o<grid->oSites();o++){
thread_for( o, grid->oSites(), {
vector_type vI;
Coordinate gcoor;
ExtractBuffer<scalar_type> mergebuf(Nsimd);
for(int i=0;i<grid->iSites();i++){
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
mergebuf[i]=(Integer)gcoor[mu];
}
merge<vector_type,scalar_type>(vI,mergebuf);
l_v[o]=vI;
}
});
};
// LatticeCoordinate();

View File

@ -156,7 +156,7 @@ void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
// Peek a scalar object from the SIMD array
//////////////////////////////////////////////////////////
template<class vobj,class sobj>
void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site){
accelerator_inline void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site){
GridBase *grid = l.Grid();
@ -185,7 +185,7 @@ void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site){
};
template<class vobj,class sobj>
void pokeLocalSite(const sobj &s,Lattice<vobj> &l,Coordinate &site){
accelerator_inline void pokeLocalSite(const sobj &s,Lattice<vobj> &l,Coordinate &site){
GridBase *grid=l.Grid();

View File

@ -317,116 +317,6 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
}
}
template<class vobj>
static void mySliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
{
// std::cout << GridLogMessage << "Start mySliceInnerProductVector" << std::endl;
typedef typename vobj::scalar_type scalar_type;
std::vector<scalar_type> lsSum;
localSliceInnerProductVector(result, lhs, rhs, lsSum, orthogdim);
globalSliceInnerProductVector(result, lhs, lsSum, orthogdim);
// std::cout << GridLogMessage << "End mySliceInnerProductVector" << std::endl;
}
template <class vobj>
static void localSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, const Lattice<vobj> &rhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
{
// std::cout << GridLogMessage << "Start prep" << std::endl;
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
GridBase *grid = lhs.Grid();
assert(grid!=NULL);
conformable(grid,rhs.Grid());
const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd();
assert(orthogdim >= 0);
assert(orthogdim < Nd);
int fd=grid->_fdimensions[orthogdim];
int ld=grid->_ldimensions[orthogdim];
int rd=grid->_rdimensions[orthogdim];
// std::cout << GridLogMessage << "Start alloc" << std::endl;
Vector<vector_type> lvSum(rd); // will locally sum vectors first
lsSum.resize(ld,scalar_type(0.0)); // sum across these down to scalars
ExtractBuffer<iScalar<scalar_type> > extracted(Nsimd); // splitting the SIMD
// std::cout << GridLogMessage << "End alloc" << std::endl;
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
for(int r=0;r<rd;r++){
lvSum[r]=Zero();
}
int e1= grid->_slice_nblock[orthogdim];
int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim];
// std::cout << GridLogMessage << "End prep" << std::endl;
// std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl;
vector_type vv;
auto l_v=lhs.View();
auto r_v=rhs.View();
thread_for( r,rd,{
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int ss = so + n * stride + b;
vv = TensorRemove(innerProduct(l_v[ss], r_v[ss]));
lvSum[r] = lvSum[r] + vv;
}
}
});
// std::cout << GridLogMessage << "End parallel inner product" << std::endl;
// Sum across simd lanes in the plane, breaking out orthog dir.
Coordinate icoor(Nd);
for(int rt=0;rt<rd;rt++){
iScalar<vector_type> temp;
temp._internal = lvSum[rt];
extract(temp,extracted);
for(int idx=0;idx<Nsimd;idx++){
grid->iCoorFromIindex(icoor,idx);
int ldx =rt+icoor[orthogdim]*rd;
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
}
}
// std::cout << GridLogMessage << "End sum over simd lanes" << std::endl;
}
template <class vobj>
static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
{
typedef typename vobj::scalar_type scalar_type;
GridBase *grid = lhs.Grid();
int fd = result.size();
int ld = lsSum.size();
// sum over nodes.
std::vector<scalar_type> gsum;
gsum.resize(fd, scalar_type(0.0));
// std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl;
for(int t=0;t<fd;t++){
int pt = t/ld; // processor plane
int lt = t%ld;
if ( pt == grid->_processor_coor[orthogdim] ) {
gsum[t]=lsSum[lt];
}
}
// std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl;
// std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl;
grid->GlobalSumVector(&gsum[0], fd);
// std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl;
result = gsum;
}
template<class vobj>
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
{

View File

@ -1,5 +1,4 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/Lattice_transfer.h
@ -83,12 +82,35 @@ template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Latti
});
}
template<class vobj,class CComplex,int nbasis>
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
const Lattice<vobj> &fineData,
const std::vector<Lattice<vobj> > &Basis)
{
GridBase * fine = fineData.Grid();
GridBase * coarse= coarseData.Grid();
Lattice<CComplex> ip(coarse);
// auto fineData_ = fineData.View();
auto coarseData_ = coarseData.View();
auto ip_ = ip.View();
for(int v=0;v<nbasis;v++) {
blockInnerProduct(ip,Basis[v],fineData);
accelerator_for( sc, coarse->oSites(), vobj::Nsimd(), {
coalescedWrite(coarseData_[sc](v),ip_(sc));
});
}
}
template<class vobj,class CComplex,int nbasis>
inline void blockProject1(Lattice<iVector<CComplex,nbasis > > &coarseData,
const Lattice<vobj> &fineData,
const std::vector<Lattice<vobj> > &Basis)
{
typedef iVector<CComplex,nbasis > coarseSiteData;
coarseSiteData elide;
typedef decltype(coalescedRead(elide)) ScalarComplex;
GridBase * fine = fineData.Grid();
GridBase * coarse= coarseData.Grid();
int _ndimension = coarse->_ndimension;
@ -106,26 +128,40 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]);
}
int blockVol = fine->oSites()/coarse->oSites();
coarseData=Zero();
auto fineData_ = fineData.View();
auto coarseData_ = coarseData.View();
// Loop over coars parallel, and then loop over fine associated with coarse.
thread_for( sf, fine->oSites(), {
int sc;
Coordinate coor_c(_ndimension);
Coordinate coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
////////////////////////////////////////////////////////////////////////////////////////////////////////
// To make this lock free, loop over coars parallel, and then loop over fine associated with coarse.
// Otherwise do fine inner product per site, and make the update atomic
////////////////////////////////////////////////////////////////////////////////////////////////////////
accelerator_for( sci, nbasis*coarse->oSites(), vobj::Nsimd(), {
thread_critical {
for(int i=0;i<nbasis;i++) {
auto Basis_ = Basis[i].View();
coarseData_[sc](i)=coarseData_[sc](i) + innerProduct(Basis_[sf],fineData_[sf]);
}
auto sc=sci/nbasis;
auto i=sci%nbasis;
auto Basis_ = Basis[i].View();
Coordinate coor_c(_ndimension);
Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate
int sf;
decltype(innerProduct(Basis_(sf),fineData_(sf))) reduce=Zero();
for(int sb=0;sb<blockVol;sb++){
Coordinate coor_b(_ndimension);
Coordinate coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_b,sb,block_r);
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d]+coor_b[d];
Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions);
reduce=reduce+innerProduct(Basis_(sf),fineData_(sf));
}
coalescedWrite(coarseData_[sc](i),reduce);
});
return;
}
@ -160,7 +196,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
auto fineY_ = fineY.View();
auto coarseA_= coarseA.View();
thread_for(sf, fine->oSites(), {
accelerator_for(sf, fine->oSites(), CComplex::Nsimd(), {
int sc;
Coordinate coor_c(_ndimension);
@ -171,7 +207,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
// z = A x + y
fineZ_[sf]=coarseA_[sc]*fineX_[sf]+fineY_[sf];
coalescedWrite(fineZ_[sf],coarseA_(sc)*fineX_(sf)+fineY_(sf));
});
@ -196,7 +232,7 @@ inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
fine_inner = localInnerProduct(fineX,fineY);
blockSum(coarse_inner,fine_inner);
thread_for(ss, coarse->oSites(),{
accelerator_for(ss, coarse->oSites(), 1, {
CoarseInner_[ss] = coarse_inner_[ss];
});
}
@ -226,23 +262,29 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
for(int d=0 ; d<_ndimension;d++){
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
}
int blockVol = fine->oSites()/coarse->oSites();
// Turn this around to loop threaded over sc and interior loop
// over sf would thread better
coarseData=Zero();
auto coarseData_ = coarseData.View();
auto fineData_ = fineData.View();
thread_for(sf,fine->oSites(),{
int sc;
accelerator_for(sc,coarse->oSites(),1,{
// One thread per sub block
Coordinate coor_c(_ndimension);
Coordinate coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
thread_critical {
Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate
coarseData_[sc]=Zero();
for(int sb=0;sb<blockVol;sb++){
int sf;
Coordinate coor_b(_ndimension);
Coordinate coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_b,sb,block_r); // Block sub coordinate
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions);
coarseData_[sc]=coarseData_[sc]+fineData_[sf];
}
@ -296,6 +338,7 @@ inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> >
}
}
#if 0
template<class vobj,class CComplex,int nbasis>
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
Lattice<vobj> &fineData,
@ -321,7 +364,7 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
auto coarseData_ = coarseData.View();
// Loop with a cache friendly loop ordering
thread_for(sf,fine->oSites(),{
accelerator_for(sf,fine->oSites(),1,{
int sc;
Coordinate coor_c(_ndimension);
Coordinate coor_f(_ndimension);
@ -332,13 +375,35 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
for(int i=0;i<nbasis;i++) {
auto basis_ = Basis[i].View();
if(i==0) fineData_[sf]=coarseData_[sc](i) *basis_[sf];
else fineData_[sf]=fineData_[sf]+coarseData_[sc](i)*basis_[sf];
if(i==0) fineData_[sf]=coarseData_[sc](i) *basis_[sf]);
else fineData_[sf]=fineData_[sf]+coarseData_[sc](i)*basis_[sf]);
}
});
return;
}
#else
template<class vobj,class CComplex,int nbasis>
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
Lattice<vobj> &fineData,
const std::vector<Lattice<vobj> > &Basis)
{
GridBase * fine = fineData.Grid();
GridBase * coarse= coarseData.Grid();
fineData=Zero();
for(int i=0;i<nbasis;i++) {
Lattice<iScalar<CComplex> > ip = PeekIndex<0>(coarseData,i);
Lattice<CComplex> cip(coarse);
auto cip_ = cip.View();
auto ip_ = ip.View();
accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{
coalescedWrite(cip_[sc], ip_(sc)());
});
blockZAXPY<vobj,CComplex >(fineData,cip,Basis[i],fineData);
}
}
#endif
// Useful for precision conversion, or indeed anything where an operator= does a conversion on scalars.
// Simd layouts need not match since we use peek/poke Local
@ -374,6 +439,67 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
});
}
template<class vobj>
void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate FromLowerLeft, Coordinate ToLowerLeft, Coordinate RegionSize)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
static const int words=sizeof(vobj)/sizeof(vector_type);
GridBase *Fg = From.Grid();
GridBase *Tg = To.Grid();
assert(!Fg->_isCheckerBoarded);
assert(!Tg->_isCheckerBoarded);
int Nsimd = Fg->Nsimd();
int nF = Fg->_ndimension;
int nT = Tg->_ndimension;
int nd = nF;
assert(nF == nT);
for(int d=0;d<nd;d++){
assert(Fg->_processors[d] == Tg->_processors[d]);
}
// the above should guarantee that the operations are local
Coordinate ldf = Fg->_ldimensions;
Coordinate rdf = Fg->_rdimensions;
Coordinate isf = Fg->_istride;
Coordinate osf = Fg->_ostride;
Coordinate rdt = Tg->_rdimensions;
Coordinate ist = Tg->_istride;
Coordinate ost = Tg->_ostride;
auto t_v = To.View();
auto f_v = From.View();
accelerator_for(idx,Fg->lSites(),1,{
sobj s;
Coordinate Fcoor(nd);
Coordinate Tcoor(nd);
Lexicographic::CoorFromIndex(Fcoor,idx,ldf);
int in_region=1;
for(int d=0;d<nd;d++){
if ( (Fcoor[d] < FromLowerLeft[d]) || (Fcoor[d]>=FromLowerLeft[d]+RegionSize[d]) ){
in_region=0;
}
Tcoor[d] = ToLowerLeft[d]+ Fcoor[d]-FromLowerLeft[d];
}
if (in_region) {
Integer idx_f = 0; for(int d=0;d<nd;d++) idx_f+=isf[d]*(Fcoor[d]/rdf[d]);
Integer idx_t = 0; for(int d=0;d<nd;d++) idx_t+=ist[d]*(Tcoor[d]/rdt[d]);
Integer odx_f = 0; for(int d=0;d<nd;d++) odx_f+=osf[d]*(Fcoor[d]%rdf[d]);
Integer odx_t = 0; for(int d=0;d<nd;d++) odx_t+=ost[d]*(Tcoor[d]%rdt[d]);
scalar_type * fp = (scalar_type *)&f_v[odx_f];
scalar_type * tp = (scalar_type *)&t_v[odx_t];
for(int w=0;w<words;w++){
tp[idx_t+w*Nsimd] = fp[idx_f+w*Nsimd]; // FIXME IF RRII layout, type pun no worke
}
// peekLocalSite(s,From,Fcoor);
// pokeLocalSite(s,To ,Tcoor);
}
});
}
template<class vobj>
void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog)

View File

@ -354,6 +354,6 @@ public:
}
};
NAMESPACE_END(QCD);
NAMESPACE_END(Grid);
#endif

View File

@ -44,8 +44,13 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <sys/syscall.h>
#endif
#ifdef __x86_64__
#ifdef GRID_NVCC
accelerator_inline uint64_t __rdtsc(void) { return 0; }
accelerator_inline uint64_t __rdpmc(int ) { return 0; }
#else
#include <x86intrin.h>
#endif
#endif
NAMESPACE_BEGIN(Grid);
@ -89,13 +94,8 @@ inline uint64_t cyclecount(void){
return tmp;
}
#elif defined __x86_64__
#ifdef GRID_NVCC
accelerator_inline uint64_t __rdtsc(void) { return 0; }
#endif
inline uint64_t cyclecount(void){
return __rdtsc();
// unsigned int dummy;
// return __rdtscp(&dummy);
}
#else

View File

@ -101,7 +101,8 @@ public:
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out);
void Meooe5D (const FermionField &in, FermionField &out);
void MeooeDag5D (const FermionField &in, FermionField &out);

View File

@ -62,14 +62,15 @@ public:
// Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out);
///////////////////////////////////////////////////////////////
// Physical surface field utilities
///////////////////////////////////////////////////////////////
// virtual void Dminus(const FermionField &psi, FermionField &chi); // Inherit trivial case
// virtual void DminusDag(const FermionField &psi, FermionField &chi); // Inherit trivial case
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
///////////////////////////////////////////////////////////////
// Physical surface field utilities
///////////////////////////////////////////////////////////////
// virtual void Dminus(const FermionField &psi, FermionField &chi); // Inherit trivial case
// virtual void DminusDag(const FermionField &psi, FermionField &chi); // Inherit trivial case
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
// Constructors
ContinuedFractionFermion5D(GaugeField &_Umu,

View File

@ -80,7 +80,7 @@ public:
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
}
//phase for boundary condition
out = out * exp(Scalar(2.0*M_PI)*ci*ph);
out = out * exp(ci*ph);
};
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist) {

View File

@ -89,6 +89,7 @@ public:
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};

View File

@ -103,6 +103,7 @@ public:
// Multigrid assistance; force term uses too
///////////////////////////////////////////////////////////////
void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
void MdirAll(const FermionField &in, std::vector<FermionField> &out);
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
///////////////////////////////////////////////////////////////

View File

@ -86,7 +86,8 @@ public:
void MooeeDag (const FermionField &in, FermionField &out);
void MooeeInvDag (const FermionField &in, FermionField &out);
void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
void MdirAll(const FermionField &in, std::vector<FermionField> &out);
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
// These can be overridden by fancy 5d chiral action

View File

@ -67,12 +67,13 @@ public:
// Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out);
///////////////////////////////////////////////////////////////
// Physical surface field utilities
///////////////////////////////////////////////////////////////
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
///////////////////////////////////////////////////////////////
// Physical surface field utilities
///////////////////////////////////////////////////////////////
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
// Constructors
PartialFractionFermion5D(GaugeField &_Umu,

View File

@ -115,9 +115,10 @@ public:
// Multigrid assistance; force term uses too
///////////////////////////////////////////////////////////////
void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
void MdirAll(const FermionField &in, std::vector<FermionField> &out);
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
void DhopDirDisp(const FermionField &in, FermionField &out, int dirdisp,
int gamma, int dag);
void DhopDirAll(const FermionField &in, std::vector<FermionField> &out);
void DhopDirCalc(const FermionField &in, FermionField &out, int dirdisp,int gamma, int dag);
///////////////////////////////////////////////////////////////
// Extra methods added by derived

View File

@ -111,15 +111,16 @@ public:
virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);};
virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
// These can be overridden by fancy 5d chiral action
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
// Implement hopping term non-hermitian hopping term; half cb or both
// Implement s-diagonal DW
@ -131,6 +132,9 @@ public:
// add a DhopComm
// -- suboptimal interface will presently trigger multiple comms.
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
void DhopDirAll(const FermionField &in,std::vector<FermionField> &out);
void DhopDirComms(const FermionField &in);
void DhopDirCalc(const FermionField &in, FermionField &out,int point);
///////////////////////////////////////////////////////////////
// New methods added

View File

@ -60,6 +60,9 @@ public:
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ;
static void DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
int Nsite, const FermionField &in, std::vector<FermionField> &out) ;
static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma);
@ -100,8 +103,17 @@ public:
private:
static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
static accelerator_inline void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
static accelerator_inline void DhopDirXp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirYp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirZp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirTp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirXm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirYm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirZm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirTm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
// Specialised variants
static accelerator void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,

View File

@ -54,6 +54,14 @@ public:
_Mat.Mdir(in,tmp,dir,disp);
G5R5(out,tmp);
}
void OpDirAll(const Field &in, std::vector<Field> &out) {
Field tmp(in.Grid());
_Mat.MdirAll(in,out);
for(int p=0;p<out.size();p++) {
tmp=out[p];
G5R5(out[p],tmp);
}
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
@ -96,6 +104,12 @@ public:
_Mat.Mdir(in,tmp,dir,disp);
out=g5*tmp;
}
void OpDirAll(const Field &in, std::vector<Field> &out) {
_Mat.MdirAll(in,out);
for(int p=0;p<out.size();p++) {
out[p]=g5*out[p];
}
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){

View File

@ -383,11 +383,20 @@ void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &
}
template<class Impl>
void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
Meo5D(psi,this->tmp());
// Apply 4d dslash fragment
this->DhopDir(this->tmp(),chi,dir,disp);
void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp)
{
FermionField tmp(psi.Grid());
Meo5D(psi,tmp);
this->DhopDir(tmp,chi,dir,disp);
}
template<class Impl>
void CayleyFermion5D<Impl>::MdirAll(const FermionField &psi, std::vector<FermionField> &out)
{
FermionField tmp(psi.Grid());
Meo5D(psi,tmp);
this->DhopDirAll(tmp,out);
}
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)

View File

@ -10,6 +10,7 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Gianluca Filaci <g.filaci@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -54,6 +55,10 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
auto pdiag = &diag[0];
auto pupper = &upper[0];
auto plower = &lower[0];
int Ls =this->Ls;
// 10 = 3 complex mult + 2 complex add
@ -71,7 +76,7 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5m(tmp1,psi(idx_u));
spProj5p(tmp2,psi(idx_l));
coalescedWrite(chi[ss+s],diag[s]*phi(ss+s)+upper[s]*tmp1+lower[s]*tmp2);
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
}
});
M5Dtime+=usecond();
@ -93,6 +98,10 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
auto pdiag = &diag[0];
auto pupper = &upper[0];
auto plower = &lower[0];
int Ls=this->Ls;
// Flops = 6.0*(Nc*Ns) *Ls*vol
@ -109,7 +118,7 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5p(tmp1,psi(idx_u));
spProj5m(tmp2,psi(idx_l));
coalescedWrite(chi[ss+s],diag[s]*phi(ss+s)+upper[s]*tmp1+lower[s]*tmp2);
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
}
});
M5Dtime+=usecond();
@ -139,39 +148,41 @@ CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi_i, FermionField &chi
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp;
spinor tmp, acc, res;
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
// Apply (L^{\prime})^{-1}
coalescedWrite(chi[ss],psi(ss)); // chi[0]=psi[0]
for(int s=1;s<Ls;s++){
spProj5p(tmp,chi(ss+s-1));
coalescedWrite(chi[ss+s] , psi(ss+s)-plee[s-1]*tmp);
// X = Nc*Ns
// flops = 2X + (Ls-2)(4X + 4X) + 6X + 1 + 2X + (Ls-1)(10X + 1) = -16X + Ls(1+18X) = -192 + 217*Ls flops
// Apply (L^{\prime})^{-1} L_m^{-1}
res = psi(ss);
spProj5m(tmp,res);
acc = pleem[0]*tmp;
spProj5p(tmp,res);
coalescedWrite(chi[ss],res);
for(int s=1;s<Ls-1;s++){
res = psi(ss+s);
res -= plee[s-1]*tmp;
spProj5m(tmp,res);
acc += pleem[s]*tmp;
spProj5p(tmp,res);
coalescedWrite(chi[ss+s],res);
}
// L_m^{-1}
for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -pleem[s]P_- chi
spProj5m(tmp,chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp);
}
// U_m^{-1} D^{-1}
for (int s=0;s<Ls-1;s++){
// Chi[s] + 1/d chi[s]
spProj5p(tmp,chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s)-(pueem[s]/pdee[Ls-1])*tmp);
}
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
// Apply U^{-1}
res = psi(ss+Ls-1) - plee[Ls-2]*tmp - acc;
// Apply U_m^{-1} D^{-1} U^{-1}
res = (1.0/pdee[Ls-1])*res;
coalescedWrite(chi[ss+Ls-1],res);
spProj5p(acc,res);
spProj5m(tmp,res);
for (int s=Ls-2;s>=0;s--){
spProj5m(tmp,chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - puee[s]*tmp);
res = (1.0/pdee[s])*chi(ss+s) - puee[s]*tmp - pueem[s]*acc;
spProj5m(tmp,res);
coalescedWrite(chi[ss+s],res);
}
});
MooeeInvTime+=usecond();
}
template<class Impl>
@ -201,31 +212,36 @@ CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp;
spinor tmp, acc, res;
// Apply (U^{\prime})^{-dagger}
coalescedWrite(chi[ss],psi(ss));
for (int s=1;s<Ls;s++){
spProj5m(tmp,chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s)-conjugate(puee[s-1])*tmp);
// X = Nc*Ns
// flops = 2X + (Ls-2)(4X + 4X) + 6X + 1 + 2X + (Ls-1)(10X + 1) = -16X + Ls(1+18X) = -192 + 217*Ls flops
// Apply (U^{\prime})^{-dagger} U_m^{-\dagger}
res = psi(ss);
spProj5p(tmp,res);
acc = conjugate(pueem[0])*tmp;
spProj5m(tmp,res);
coalescedWrite(chi[ss],res);
for(int s=1;s<Ls-1;s++){
res = psi(ss+s);
res -= conjugate(puee[s-1])*tmp;
spProj5p(tmp,res);
acc += conjugate(pueem[s])*tmp;
spProj5m(tmp,res);
coalescedWrite(chi[ss+s],res);
}
// U_m^{-\dagger}
for (int s=0;s<Ls-1;s++){
spProj5p(tmp,chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - conjugate(pueem[s])*tmp);
}
// L_m^{-\dagger} D^{-dagger}
for (int s=0;s<Ls-1;s++){
spProj5m(tmp,chi(ss+Ls-1));
coalescedWrite(chi[ss+s], conjugate(1.0/pdee[s])*chi(ss+s)-conjugate(pleem[s]/pdee[Ls-1])*tmp);
}
coalescedWrite(chi[ss+Ls-1], conjugate(1.0/pdee[Ls-1])*chi(ss+Ls-1));
// Apply L^{-dagger}
res = psi(ss+Ls-1) - conjugate(puee[Ls-2])*tmp - acc;
// Apply L_m^{-\dagger} D^{-dagger} L^{-dagger}
res = conjugate(1.0/pdee[Ls-1])*res;
coalescedWrite(chi[ss+Ls-1],res);
spProj5m(acc,res);
spProj5p(tmp,res);
for (int s=Ls-2;s>=0;s--){
spProj5p(tmp,chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - conjugate(plee[s])*tmp);
res = conjugate(1.0/pdee[s])*chi(ss+s) - conjugate(plee[s])*tmp - conjugate(pleem[s])*acc;
spProj5p(tmp,res);
coalescedWrite(chi[ss+s],res);
}
});
MooeeInvTime+=usecond();

View File

@ -143,6 +143,25 @@ void ContinuedFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionFi
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::MdirAll (const FermionField &psi, std::vector<FermionField> &chi)
{
int Ls = this->Ls;
this->DhopDirAll(psi,chi); // Dslash on diagonal. g5 Dslash is hermitian
for(int p=0;p<chi.size();p++){
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(chi[p],Beta[s]*ZoloHiInv,chi[p],0.0,chi[p],s,s);
} else {
ag5xpby_ssp(chi[p],cc[s]*Beta[s]*sign*ZoloHiInv,chi[p],0.0,chi[p],s,s);
}
sign=-sign;
}
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
{
int Ls = this->Ls;

View File

@ -11,6 +11,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
Author: Gianluca Filaci <g.filaci@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -49,6 +50,9 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionFi
auto psi = psi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
auto pdiag = &diag[0];
auto pupper = &upper[0];
auto plower = &lower[0];
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
@ -63,7 +67,7 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionFi
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5m(tmp1, psi(idx_u));
spProj5p(tmp2, psi(idx_l));
coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
}
});
@ -82,6 +86,9 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const Fermio
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
auto pdiag = &diag[0];
auto pupper = &upper[0];
auto plower = &lower[0];
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
@ -97,7 +104,7 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const Fermio
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5p(tmp1, psi(idx_u));
spProj5m(tmp2, psi(idx_l));
coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
}
});
@ -124,36 +131,37 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionFie
this->MooeeInvTime -= usecond();
uint64_t nloop=grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
auto ss=sss*Ls;
uint64_t ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2;
spinor tmp, acc, res;
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
// Apply (L^{\prime})^{-1}
coalescedWrite(chi[ss],psi(ss)); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
spProj5p(tmp1, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - plee[s-1]*tmp1);
// Apply (L^{\prime})^{-1} L_m^{-1}
res = psi(ss);
spProj5m(tmp,res);
acc = pleem[0]*tmp;
spProj5p(tmp,res);
coalescedWrite(chi[ss],res);
for(int s=1;s<Ls-1;s++){
res = psi(ss+s);
res -= plee[s-1]*tmp;
spProj5m(tmp,res);
acc += pleem[s]*tmp;
spProj5p(tmp,res);
coalescedWrite(chi[ss+s],res);
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp1, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp1);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pueem[s]/pdee[Ls])*tmp1);
}
spProj5m(tmp2, chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1],(1.0/pdee[Ls])*tmp1 + (1.0/pdee[Ls-1])*tmp2);
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
spProj5m(tmp1, chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - puee[s]*tmp1);
res = psi(ss+Ls-1) - plee[Ls-2]*tmp - acc;
// Apply U_m^{-1} D^{-1} U^{-1}
acc = (1.0/pdee[Ls ])*res;
tmp = (1.0/pdee[Ls-1])*res;
spProj5p(acc,acc);
spProj5m(tmp,tmp);
coalescedWrite(chi[ss+Ls-1], acc + tmp);
for (int s=Ls-2;s>=0;s--){
res = (1.0/pdee[s])*chi(ss+s) - puee[s]*tmp - pueem[s]*acc;
spProj5m(tmp,res);
coalescedWrite(chi[ss+s],res);
}
});
this->MooeeInvTime += usecond();
@ -168,56 +176,50 @@ void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi_i, Fermion
auto chi = chi_i.View();
int Ls = this->Ls;
auto plee = & this->lee[0];
auto pdee = & this->dee[0];
auto puee = & this->uee[0];
auto pleem = & this->leem[0];
auto pueem = & this->ueem[0];
assert(psi.Checkerboard() == psi.Checkerboard());
Vector<Coeff_t> ueec(Ls);
Vector<Coeff_t> deec(Ls+1);
Vector<Coeff_t> leec(Ls);
Vector<Coeff_t> ueemc(Ls);
Vector<Coeff_t> leemc(Ls);
for(int s=0; s<ueec.size(); s++){
ueec[s] = conjugate(this->uee[s]);
deec[s] = conjugate(this->dee[s]);
leec[s] = conjugate(this->lee[s]);
ueemc[s] = conjugate(this->ueem[s]);
leemc[s] = conjugate(this->leem[s]);
}
deec[Ls] = conjugate(this->dee[Ls]);
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
auto nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2;
auto ss=sss*Ls;
spinor tmp, acc, res;
// Apply (U^{\prime})^{-dagger}
coalescedWrite(chi[ss], psi(ss));
for(int s=1; s<Ls; s++){
spProj5m(tmp1, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - ueec[s-1]*tmp1);
// Apply (U^{\prime})^{-dagger} U_m^{-\dagger}
res = psi(ss);
spProj5p(tmp,res);
acc = conjugate(pueem[0])*tmp;
spProj5m(tmp,res);
coalescedWrite(chi[ss],res);
for(int s=1;s<Ls-1;s++){
res = psi(ss+s);
res -= conjugate(puee[s-1])*tmp;
spProj5p(tmp,res);
acc += conjugate(pueem[s])*tmp;
spProj5m(tmp,res);
coalescedWrite(chi[ss+s],res);
}
// U_m^{-\dagger}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp1, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - ueemc[s]*tmp1);
}
// L_m^{-\dagger} D^{-dagger}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+s] ,(1.0/deec[s])*chi(ss+s) - (leemc[s]/deec[Ls-1])*tmp1);
}
spProj5p(tmp2, chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1], (1.0/deec[Ls-1])*tmp1 + (1.0/deec[Ls])*tmp2);
// Apply L^{-dagger}
for(int s=Ls-2; s>=0; s--){
spProj5p(tmp1, chi(ss+s+1));
coalescedWrite(chi[ss+s],chi(ss+s) - leec[s]*tmp1);
res = psi(ss+Ls-1) - conjugate(puee[Ls-2])*tmp - acc;
// Apply L_m^{-\dagger} D^{-dagger} L^{-dagger}
acc = conjugate(1.0/pdee[Ls-1])*res;
tmp = conjugate(1.0/pdee[Ls ])*res;
spProj5m(acc,acc);
spProj5p(tmp,tmp);
coalescedWrite(chi[ss+Ls-1], acc + tmp);
for (int s=Ls-2;s>=0;s--){
res = conjugate(1.0/pdee[s])*chi(ss+s) - conjugate(plee[s])*tmp - conjugate(pleem[s])*acc;
spProj5p(tmp,res);
coalescedWrite(chi[ss+s],res);
}
});

View File

@ -538,10 +538,16 @@ void ImprovedStaggeredFermion5D<Impl>::ZeroCounters(void)
// Implement the general interface. Here we use SAME mass on all slices
/////////////////////////////////////////////////////////////////////////
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) {
void ImprovedStaggeredFermion5D<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
{
DhopDir(in, out, dir, disp);
}
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
{
assert(0);
}
template <class Impl>
RealD ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out) {
out.Checkerboard() = in.Checkerboard();
Dhop(in, out, DaggerNo);

View File

@ -362,12 +362,19 @@ void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) {
void ImprovedStaggeredFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
{
DhopDir(in, out, dir, disp);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
{
assert(0); // Not implemented yet
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) {
void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
{
Compressor compressor;
Stencil.HaloExchange(in, compressor);
@ -380,6 +387,7 @@ void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionFiel
});
};
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
@ -404,7 +412,6 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
#ifdef GRID_OMP
Compressor compressor;
int len = U.Grid()->oSites();
const int LLs = 1;
DhopTotalTime -= usecond();

View File

@ -11,6 +11,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
Author: Gianluca Filaci <g.filaci@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -49,6 +50,10 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi_i, const FermionField
assert(phi.Checkerboard() == psi.Checkerboard());
auto pdiag = &diag[0];
auto pupper = &upper[0];
auto plower = &lower[0];
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
@ -64,7 +69,7 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi_i, const FermionField
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5m(tmp1, psi(idx_u));
spProj5p(tmp2, psi(idx_l));
coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
}
});
@ -88,6 +93,11 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const Fermion
assert(phi.Checkerboard() == psi.Checkerboard());
auto pdiag = &diag[0];
auto pupper = &upper[0];
auto plower = &lower[0];
auto pshift_coeffs = &shift_coeffs[0];
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
@ -108,7 +118,7 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const Fermion
if(pm == 1){ spProj5p(tmp, psi(ss+shift_s)); }
else { spProj5m(tmp, psi(ss+shift_s)); }
coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 +lower[s]*tmp2 + shift_coeffs[s]*tmp);
coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 +plower[s]*tmp2 + pshift_coeffs[s]*tmp);
}
});
@ -128,6 +138,10 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi_i, const FermionFie
assert(phi.Checkerboard() == psi.Checkerboard());
auto pdiag = &diag[0];
auto pupper = &upper[0];
auto plower = &lower[0];
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
@ -144,7 +158,7 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi_i, const FermionFie
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5p(tmp1, psi(idx_u));
spProj5m(tmp2, psi(idx_l));
coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
}
});
@ -166,6 +180,11 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const Ferm
assert(phi.Checkerboard() == psi.Checkerboard());
auto pdiag = &diag[0];
auto pupper = &upper[0];
auto plower = &lower[0];
auto pshift_coeffs = &shift_coeffs[0];
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
@ -189,12 +208,12 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const Ferm
spProj5p(tmp1, psi(idx_u));
spProj5m(tmp2, psi(idx_l));
if(s==(Ls-1)) coalescedWrite(chi[ss+s], chi(ss+s)+ diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
else coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
if(s==(Ls-1)) coalescedWrite(chi[ss+s], chi(ss+s)+ pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
else coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
if(pm == 1){ spProj5p(tmp, psi(ss+s)); }
else { spProj5m(tmp, psi(ss+s)); }
coalescedWrite(chi[ss+shift_s],chi(ss+shift_s)+shift_coeffs[s]*tmp);
coalescedWrite(chi[ss+shift_s],chi(ss+shift_s)+pshift_coeffs[s]*tmp);
}
});
@ -223,36 +242,38 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
uint64_t ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp;
spinor tmp, acc, res;
// Apply (L^{\prime})^{-1}
coalescedWrite(chi[ss], psi(ss)); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
spProj5p(tmp, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - plee[s-1]*tmp);
// X = Nc*Ns
// flops = 2X + (Ls-2)(4X + 4X) + 6X + 1 + 2X + (Ls-1)(10X + 1) = -16X + Ls(1+18X) = -192 + 217*Ls flops
// Apply (L^{\prime})^{-1} L_m^{-1}
res = psi(ss);
spProj5m(tmp,res);
acc = pleem[0]*tmp;
spProj5p(tmp,res);
coalescedWrite(chi[ss],res);
for(int s=1;s<Ls-1;s++){
res = psi(ss+s);
res -= plee[s-1]*tmp;
spProj5m(tmp,res);
acc += pleem[s]*tmp;
spProj5p(tmp,res);
coalescedWrite(chi[ss+s],res);
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pueem[s]/pdee[Ls-1])*tmp);
}
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
spProj5m(tmp, chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - puee[s]*tmp);
res = psi(ss+Ls-1) - plee[Ls-2]*tmp - acc;
// Apply U_m^{-1} D^{-1} U^{-1}
res = (1.0/pdee[Ls-1])*res;
coalescedWrite(chi[ss+Ls-1],res);
spProj5p(acc,res);
spProj5m(tmp,res);
for (int s=Ls-2;s>=0;s--){
res = (1.0/pdee[s])*chi(ss+s) - puee[s]*tmp - pueem[s]*acc;
spProj5m(tmp,res);
coalescedWrite(chi[ss+s],res);
}
});
@ -281,45 +302,45 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionF
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp, acc, res, tmp_spProj;
uint64_t ss = sss*Ls;
// Apply (L^{\prime})^{-1} L_m^{-1}
res = psi(ss);
spProj5m(tmp,res);
acc = pleem[0]*tmp;
spProj5p(tmp,res);
coalescedWrite(chi[ss],res);
tmp_spProj = pMooeeInv_shift_lc[0]*res;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2,tmp2_spProj;
for(int s=1;s<Ls-1;s++){
res = psi(ss+s);
tmp_spProj += pMooeeInv_shift_lc[s]*res;
res -= plee[s-1]*tmp;
spProj5m(tmp,res);
acc += pleem[s]*tmp;
spProj5p(tmp,res);
coalescedWrite(chi[ss+s],res);
}
res = psi(ss+Ls-1);
// Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2
coalescedWrite(chi[ss], psi(ss)); // chi[0]=psi[0]
tmp2 = pMooeeInv_shift_lc[0]*psi(ss);
for(int s=1; s<Ls; s++){
spProj5p(tmp1, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - plee[s-1]*tmp1);
tmp2 = tmp2 + pMooeeInv_shift_lc[s]*psi(ss+s);
}
if(pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else { spProj5m(tmp2_spProj, tmp2); }
tmp_spProj += pMooeeInv_shift_lc[Ls-1]*res;
if(pm == 1){ spProj5p(tmp_spProj, tmp_spProj);}
else { spProj5m(tmp_spProj, tmp_spProj); }
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp1, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp1);
}
res = res - plee[Ls-2]*tmp - acc;
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pueem[s]/pdee[Ls-1])*tmp1);
}
// chi[ss+Ls-1] = (1.0/pdee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
spProj5m(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) + pMooeeInv_shift_norm[Ls-1]*tmp2_spProj);
// Apply U^{-1} and add shift term
for(int s=Ls-2; s>=0; s--){
coalescedWrite(chi[ss+s] , chi(ss+s) - puee[s]*tmp1);
spProj5m(tmp1, chi(ss+s));
coalescedWrite(chi[ss+s], chi(ss+s) + pMooeeInv_shift_norm[s]*tmp2_spProj);
}
// Apply U_m^{-1} D^{-1} U^{-1}
res = (1.0/pdee[Ls-1])*res;
spProj5p(acc,res);
spProj5m(tmp,res);
coalescedWrite(chi[ss+Ls-1], res + pMooeeInv_shift_norm[Ls-1]*tmp_spProj);
for (int s=Ls-2;s>=0;s--){
res = (1.0/pdee[s])*chi(ss+s) - puee[s]*tmp - pueem[s]*acc;
spProj5m(tmp,res);
coalescedWrite(chi[ss+s], res + pMooeeInv_shift_norm[s]*tmp_spProj);
}
});
this->MooeeInvTime += usecond();
@ -347,39 +368,40 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionFiel
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
uint64_t ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp;
spinor tmp, acc, res;
// Apply (U^{\prime})^{-dag}
coalescedWrite(chi[ss], psi(ss));
for(int s=1; s<Ls; s++){
spProj5m(tmp, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - puee[s-1]*tmp);
}
// X = Nc*Ns
// flops = 2X + (Ls-2)(4X + 4X) + 6X + 1 + 2X + (Ls-1)(10X + 1) = -16X + Ls(1+18X) = -192 + 217*Ls flops
// Apply (U^{\prime})^{-dagger} U_m^{-\dagger}
res = psi(ss);
spProj5p(tmp,res);
acc = pueem[0]*tmp;
spProj5m(tmp,res);
coalescedWrite(chi[ss],res);
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pueem[s]*tmp);
for(int s=1;s<Ls-1;s++){
res = psi(ss+s);
res -= puee[s-1]*tmp;
spProj5p(tmp,res);
acc += pueem[s]*tmp;
spProj5m(tmp,res);
coalescedWrite(chi[ss+s],res);
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pleem[s]/pdee[Ls-1])*tmp);
}
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
spProj5p(tmp, chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - plee[s]*tmp);
res = psi(ss+Ls-1) - puee[Ls-2]*tmp - acc;
// Apply L_m^{-\dagger} D^{-dagger} L^{-dagger}
res = (1.0/pdee[Ls-1])*res;
coalescedWrite(chi[ss+Ls-1],res);
spProj5m(acc,res);
spProj5p(tmp,res);
for (int s=Ls-2;s>=0;s--){
res = (1.0/pdee[s])*chi(ss+s) - plee[s]*tmp - pleem[s]*acc;
spProj5p(tmp,res);
coalescedWrite(chi[ss+s],res);
}
});
this->MooeeInvTime += usecond();
}
@ -406,45 +428,45 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, Fermi
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp, acc, res, tmp_spProj;
uint64_t ss = sss*Ls;
// Apply (U^{\prime})^{-dagger} U_m^{-\dagger}
res = psi(ss);
spProj5p(tmp,res);
acc = pueem[0]*tmp;
spProj5m(tmp,res);
coalescedWrite(chi[ss],res);
tmp_spProj = pMooeeInvDag_shift_lc[0]*res;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2,tmp2_spProj;
for(int s=1;s<Ls-1;s++){
res = psi(ss+s);
tmp_spProj += pMooeeInvDag_shift_lc[s]*res;
res -= puee[s-1]*tmp;
spProj5p(tmp,res);
acc += pueem[s]*tmp;
spProj5m(tmp,res);
coalescedWrite(chi[ss+s],res);
}
res = psi(ss+Ls-1);
// Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2
coalescedWrite(chi[ss], psi(ss));
tmp2 = pMooeeInvDag_shift_lc[0]*psi(ss);
for(int s=1; s<Ls; s++){
spProj5m(tmp1, chi(ss+s-1));
coalescedWrite(chi[ss+s],psi(ss+s) - puee[s-1]*tmp1);
tmp2 = tmp2 + pMooeeInvDag_shift_lc[s]*psi(ss+s);
}
tmp_spProj += pMooeeInvDag_shift_lc[Ls-1]*res;
if(pm == 1){ spProj5p(tmp_spProj, tmp_spProj); }
else { spProj5m(tmp_spProj, tmp_spProj); }
if(pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else { spProj5m(tmp2_spProj, tmp2);}
res = res - puee[Ls-2]*tmp - acc;
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp1, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pueem[s]*tmp1);
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pleem[s]/pdee[Ls-1])*tmp1);
}
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
spProj5p(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) + pMooeeInvDag_shift_norm[Ls-1]*tmp2_spProj);
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
coalescedWrite(chi[ss+s], chi(ss+s) - plee[s]*tmp1);
spProj5p(tmp1, chi(ss+s));
coalescedWrite(chi[ss+s], chi(ss+s) + pMooeeInvDag_shift_norm[s]*tmp2_spProj);
}
// Apply L_m^{-\dagger} D^{-dagger} L^{-dagger}
res = (1.0/pdee[Ls-1])*res;
spProj5m(acc,res);
spProj5p(tmp,res);
coalescedWrite(chi[ss+Ls-1], res + pMooeeInvDag_shift_norm[Ls-1]*tmp_spProj);
for (int s=Ls-2;s>=0;s--){
res = (1.0/pdee[s])*chi(ss+s) - plee[s]*tmp - pleem[s]*acc;
spProj5p(tmp,res);
coalescedWrite(chi[ss+s], res + pMooeeInvDag_shift_norm[s]*tmp_spProj);
}
});
this->MooeeInvTime += usecond();

View File

@ -31,7 +31,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid);
template<class Impl>
template<class Impl>
void PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
// this does both dag and undag but is trivial; make a common helper routing
int Ls = this->Ls;
@ -45,8 +45,25 @@ void PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionFiel
ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1);
}
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::MdirAll (const FermionField &psi, std::vector<FermionField> &chi){
// this does both dag and undag but is trivial; make a common helper routing
int Ls = this->Ls;
this->DhopDirAll(psi,chi);
for(int point=0;point<chi.size();point++){
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(chi[point],-scale,chi[point],0.0,chi[point],s,s);
ag5xpby_ssp(chi[point], scale,chi[point],0.0,chi[point],s+1,s+1);
}
ag5xpby_ssp(chi[point],p[nblock]*scale/amax,chi[point],0.0,chi[point],Ls-1,Ls-1);
}
}
template<class Impl>
void PartialFractionFermion5D<Impl>::Meooe_internal(const FermionField &psi, FermionField &chi,int dag)
{

View File

@ -241,6 +241,15 @@ void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,in
Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma);
};
template<class Impl>
void WilsonFermion5D<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
{
Compressor compressor(DaggerNo);
Stencil.HaloExchange(in,compressor);
uint64_t Nsite = Umu.Grid()->oSites();
Kernels::DhopDirAll(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out);
};
template<class Impl>
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,

View File

@ -319,28 +319,51 @@ void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int d
}
template <class Impl>
void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) {
void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
{
DhopDir(in, out, dir, disp);
}
template <class Impl>
void WilsonFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
{
DhopDirAll(in, out);
}
template <class Impl>
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
{
Compressor compressor(DaggerNo);
Stencil.HaloExchange(in, compressor);
int skip = (disp == 1) ? 0 : 1;
int dirdisp = dir + skip * 4;
int gamma = dir + (1 - skip) * 4;
DhopDirDisp(in, out, dirdisp, gamma, DaggerNo);
DhopDirCalc(in, out, dirdisp, gamma, DaggerNo);
};
template <class Impl>
void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag)
void WilsonFermion<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
{
Compressor compressor(dag);
Compressor compressor(DaggerNo);
Stencil.HaloExchange(in, compressor);
assert((out.size()==8)||(out.size()==9));
for(int dir=0;dir<Nd;dir++){
for(int disp=-1;disp<=1;disp+=2){
int skip = (disp == 1) ? 0 : 1;
int dirdisp = dir + skip * 4;
int gamma = dir + (1 - skip) * 4;
DhopDirCalc(in, out[dirdisp], dirdisp, gamma, DaggerNo);
}
}
}
template <class Impl>
void WilsonFermion<Impl>::DhopDirCalc(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag)
{
int Ls=1;
int Nsite=in.oSites();
uint64_t Nsite=in.oSites();
Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma);
};
@ -348,7 +371,8 @@ template <class Impl>
void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out, int dag) {
FermionField &out, int dag)
{
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
DhopInternalOverlappedComms(st,lo,U,in,out,dag);

View File

@ -91,8 +91,7 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
} \
synchronise();
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
if (gamma == Dir) { \
#define GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon) \
if (SE->_is_local ) { \
int perm= SE->_permute; \
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
@ -102,10 +101,14 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
} \
synchronise(); \
Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \
Recon(result, Uchi); \
synchronise(); \
Recon(result, Uchi);
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
if (gamma == Dir) { \
GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon); \
}
////////////////////////////////////////////////////////////////////
// All legs kernels ; comms then compute
////////////////////////////////////////////////////////////////////
@ -284,7 +287,36 @@ void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, DoubledGaugeField
}
};
template <class Impl>
#define DhopDirMacro(Dir,spProj,spRecon) \
template <class Impl> \
void WilsonKernels<Impl>::DhopDir##Dir(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF, \
int sU, const FermionFieldView &in, FermionFieldView &out, int dir) \
{ \
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor; \
typedef decltype(coalescedRead(in[0])) calcSpinor; \
calcHalfSpinor chi; \
calcSpinor result; \
calcHalfSpinor Uchi; \
StencilEntry *SE; \
int ptype; \
const int Nsimd = SiteHalfSpinor::Nsimd(); \
const int lane=SIMTlane(Nsimd); \
\
SE = st.GetEntry(ptype, dir, sF); \
GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,spRecon); \
coalescedWrite(out[sF], result,lane); \
}
DhopDirMacro(Xp,spProjXp,spReconXp);
DhopDirMacro(Yp,spProjYp,spReconYp);
DhopDirMacro(Zp,spProjZp,spReconZp);
DhopDirMacro(Tp,spProjTp,spReconTp);
DhopDirMacro(Xm,spProjXm,spReconXm);
DhopDirMacro(Ym,spProjYm,spReconYm);
DhopDirMacro(Zm,spProjZm,spReconZm);
DhopDirMacro(Tm,spProjTm,spReconTm);
template <class Impl>
void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
{
@ -299,18 +331,7 @@ void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si
const int lane=SIMTlane(Nsimd);
SE = st.GetEntry(ptype, dir, sF);
if (gamma == Xp) {
if (SE->_is_local ) {
int perm= SE->_permute;
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);
spProjXp(chi,tmp);
} else {
chi = coalescedRead(buf[SE->_offset],lane);
}
Impl::multLink(Uchi, U[sU], chi, dir, SE, st);
spReconXp(result, Uchi);
}
GENERIC_DHOPDIR_LEG(Xp,spProjXp,spReconXp);
GENERIC_DHOPDIR_LEG(Yp,spProjYp,spReconYp);
GENERIC_DHOPDIR_LEG(Zp,spProjZp,spReconZp);
GENERIC_DHOPDIR_LEG(Tp,spProjTp,spReconTp);
@ -321,6 +342,38 @@ void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si
coalescedWrite(out[sF], result,lane);
}
template <class Impl>
void WilsonKernels<Impl>::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
int Nsite, const FermionField &in, std::vector<FermionField> &out)
{
auto U_v = U.View();
auto in_v = in.View();
auto st_v = st.View();
auto out_Xm = out[0].View();
auto out_Ym = out[1].View();
auto out_Zm = out[2].View();
auto out_Tm = out[3].View();
auto out_Xp = out[4].View();
auto out_Yp = out[5].View();
auto out_Zp = out[6].View();
auto out_Tp = out[7].View();
accelerator_forNB(sss,Nsite*Ls,Simd::Nsimd(),{
int sU=sss/Ls;
int sF =sss;
DhopDirXm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Xm,0);
DhopDirYm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Ym,1);
DhopDirZm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Zm,2);
DhopDirTm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Tm,3);
DhopDirXp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Xp,4);
DhopDirYp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Yp,5);
DhopDirZp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Zp,6);
DhopDirTp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Tp,7);
});
}
template <class Impl>
void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma)
@ -332,13 +385,32 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
auto in_v = in.View();
auto out_v = out.View();
auto st_v = st.View();
accelerator_for(ss,Nsite,Simd::Nsimd(),{
for(int s=0;s<Ls;s++){
int sU=ss;
int sF = s+Ls*sU;
DhopDirK(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_v,dirdisp,gamma);
}
});
#define LoopBody(Dir) \
case Dir : \
accelerator_forNB(ss,Nsite,Simd::Nsimd(),{ \
for(int s=0;s<Ls;s++){ \
int sU=ss; \
int sF = s+Ls*sU; \
DhopDir##Dir(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_v,dirdisp);\
} \
}); \
break;
switch(gamma){
LoopBody(Xp);
LoopBody(Yp);
LoopBody(Zp);
LoopBody(Tp);
LoopBody(Xm);
LoopBody(Ym);
LoopBody(Zm);
LoopBody(Tm);
default:
assert(0);
break;
}
#undef LoopBody
}
#define KERNEL_CALLNB(A) \

View File

@ -26,7 +26,7 @@ See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <Grid/Grid.h>
NAMESPACE_BEGIN(Grid);

View File

@ -26,7 +26,7 @@ See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <Grid/Grid.h>
#include <Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h>
NAMESPACE_BEGIN(Grid);

View File

@ -26,7 +26,7 @@ See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <Grid/Grid.h>
#include <Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h>
NAMESPACE_BEGIN(Grid);

View File

@ -26,7 +26,7 @@ See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <Grid/Grid.h>
#include <Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h>
NAMESPACE_BEGIN(Grid);

View File

@ -97,7 +97,6 @@ protected:
////////////////////////////////////
// Classes for the user
////////////////////////////////////
// Note: the space time grid should be out of the QCD namespace
template <class vector_type>
class GridFourDimModule : public GridModule
{

View File

@ -1,5 +1,34 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/smearing/StoutSmearing.h
Copyright (C) 2019
Author: unknown
Author: Felix Erben <ferben@ed.ac.uk>
Author: Michael Marshall <Michael.Marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/*
@file stoutSmear.hpp
@file StoutSmearing.h
@brief Declares Stout smearing class
*/
#pragma once
@ -9,19 +38,43 @@ NAMESPACE_BEGIN(Grid);
/*! @brief Stout smearing of link variable. */
template <class Gimpl>
class Smear_Stout : public Smear<Gimpl> {
private:
const Smear<Gimpl>* SmearBase;
private:
int OrthogDim = -1;
const std::vector<double> SmearRho;
// Smear<Gimpl>* ownership semantics:
// Smear<Gimpl>* passed in to constructor are owned by caller, so we don't delete them here
// Smear<Gimpl>* created within constructor need to be deleted as part of the destructor
const std::unique_ptr<Smear<Gimpl>> OwnedBase; // deleted at destruction
const Smear<Gimpl>* SmearBase; // Not owned by this object, so not deleted at destruction
// only anticipated to be used from default constructor
inline static std::vector<double> rho3D(double rho, int orthogdim){
std::vector<double> rho3d(Nd*Nd);
for (int mu=0; mu<Nd; mu++)
for (int nu=0; nu<Nd; nu++)
rho3d[mu + Nd * nu] = (mu == nu || mu == orthogdim || nu == orthogdim) ? 0.0 : rho;
return rho3d;
};
public:
INHERIT_GIMPL_TYPES(Gimpl)
Smear_Stout(Smear<Gimpl>* base) : SmearBase(base) {
assert(Nc == 3);// "Stout smearing currently implemented only for Nc==3");
/*! Stout smearing with base explicitly specified */
Smear_Stout(Smear<Gimpl>* base) : SmearBase{base} {
assert(Nc == 3 && "Stout smearing currently implemented only for Nc==3");
}
/*! Default constructor */
Smear_Stout(double rho = 1.0) : SmearBase(new Smear_APE<Gimpl>(rho)) {
assert(Nc == 3);// "Stout smearing currently implemented only for Nc==3");
/*! Construct stout smearing object from explicitly specified rho matrix */
Smear_Stout(const std::vector<double>& rho_)
: OwnedBase{new Smear_APE<Gimpl>(rho_)}, SmearBase{OwnedBase.get()} {
std::cout << GridLogDebug << "Stout smearing constructor : Smear_Stout(const std::vector<double>& " << rho_ << " )" << std::endl
assert(Nc == 3 && "Stout smearing currently implemented only for Nc==3");
}
/*! Default constructor. rho is constant in all directions, optionally except for orthogonal dimension */
Smear_Stout(double rho = 1.0, int orthogdim = -1)
: OrthogDim{orthogdim}, SmearRho{ rho3D(rho,orthogdim) }, OwnedBase{ new Smear_APE<Gimpl>(SmearRho) }, SmearBase{OwnedBase.get()} {
assert(Nc == 3 && "Stout smearing currently implemented only for Nc==3");
}
~Smear_Stout() {} // delete SmearBase...
@ -36,12 +89,16 @@ public:
SmearBase->smear(C, U);
for (int mu = 0; mu < Nd; mu++) {
tmp = peekLorentz(C, mu);
Umu = peekLorentz(U, mu);
iq_mu = Ta(
tmp *
adj(Umu)); // iq_mu = Ta(Omega_mu) to match the signs with the paper
exponentiate_iQ(tmp, iq_mu);
if( mu == OrthogDim )
tmp = 1.0; // Don't smear in the orthogonal direction
else {
tmp = peekLorentz(C, mu);
Umu = peekLorentz(U, mu);
iq_mu = Ta(
tmp *
adj(Umu)); // iq_mu = Ta(Omega_mu) to match the signs with the paper
exponentiate_iQ(tmp, iq_mu);
}
pokeLorentz(u_smr, tmp * Umu, mu); // u_smr = exp(iQ_mu)*U_mu
}
std::cout << GridLogDebug << "Stout smearing completed\n";
@ -80,6 +137,7 @@ public:
iQ2 = iQ * iQ;
iQ3 = iQ * iQ2;
//We should check sgn(c0) here already and then apply eq (34) from 0311018
set_uw(u, w, iQ2, iQ3);
set_fj(f0, f1, f2, u, w);
@ -139,9 +197,8 @@ public:
}
LatticeComplex func_xi0(const LatticeComplex& w) const {
// Define a function to do the check
// if( w < 1e-4 ) std::cout << GridLogWarning<< "[Smear_stout] w too small:
// "<< w <<"\n";
// Definition from arxiv 0311018
//if (abs(w) < 0.05) {w2 = w*w; return 1.0 - w2/6.0 * (1.0-w2/20.0 * (1.0-w2/42.0));}
return sin(w) / w;
}
@ -154,4 +211,3 @@ public:
};
NAMESPACE_END(Grid);

View File

@ -67,8 +67,21 @@ public:
const std::vector<ComplexField> &emB1,
int orthogdim, double *t_kernel = nullptr, double *t_gsum = nullptr);
static void ContractWWVV(std::vector<PropagatorField> &WWVV,
const Eigen::Tensor<ComplexD,3> &WW_sd,
template <typename TensorType>
typename std::enable_if<(std::is_same<Eigen::Tensor<ComplexD,3>, TensorType>::value ||
std::is_same<Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>>, TensorType>::value),
void>::type
static ContractWWVV(std::vector<PropagatorField> &WWVV,
const TensorType &WW_sd,
const FermionField *vs,
const FermionField *vd);
template <typename TensorType>
typename std::enable_if<!(std::is_same<Eigen::Tensor<ComplexD,3>, TensorType>::value ||
std::is_same<Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>>, TensorType>::value),
void>::type
static ContractWWVV(std::vector<PropagatorField> &WWVV,
const TensorType &WW_sd,
const FermionField *vs,
const FermionField *vd);
@ -98,6 +111,11 @@ public:
const FermionField *vd,
int orthogdim);
#endif
private:
inline static void OuterProductWWVV(PropagatorField &WWVV,
const vobj &lhs,
const vobj &rhs,
const int Ns, const int ss);
};
template <class FImpl>
@ -242,7 +260,7 @@ void A2Autils<FImpl>::MesonField(TensorType &mat,
int ij_dx = m+Nmom*i + Nmom*Lblock * j + Nmom*Lblock * Rblock * lt;
for(int mu=0;mu<Ngamma;mu++){
// this is a bit slow
mat(m,mu,t,i,j) = trace(lsSum[ij_dx]*Gamma(gammas[mu]));
mat(m,mu,t,i,j) = trace(lsSum[ij_dx]*Gamma(gammas[mu]))()()();
}
}
}
@ -968,9 +986,13 @@ void A2Autils<FImpl>::AslashField(TensorType &mat,
// Take WW_sd v^dag_d (x) v_s
//
template<class FImpl>
void A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
const Eigen::Tensor<ComplexD,3> &WW_sd,
template <class FImpl>
template <typename TensorType>
typename std::enable_if<(std::is_same<Eigen::Tensor<ComplexD,3>, TensorType>::value ||
std::is_same<Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>>, TensorType>::value),
void>::type
A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
const TensorType &WW_sd,
const FermionField *vs,
const FermionField *vd)
{
@ -992,39 +1014,100 @@ void A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
for(int d_o=0;d_o<N_d;d_o+=d_unroll){
for(int t=0;t<N_t;t++){
for(int s=0;s<N_s;s++){
auto vs_v = vs[s].View();
auto tmp1 = vs_v[ss];
vobj tmp2 = Zero();
vobj tmp3 = Zero();
for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
auto vd_v = vd[d].View();
Scalar_v coeff = WW_sd(t,s,d);
tmp3 = conjugate(vd_v[ss]);
mac(&tmp2, &coeff, &tmp3);
}
auto vs_v = vs[s].View();
auto tmp1 = vs_v[ss];
vobj tmp2 = Zero();
vobj tmp3 = Zero();
for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
auto vd_v = vd[d].View();
Scalar_v coeff = WW_sd(t,s,d);
tmp3 = conjugate(vd_v[ss]);
mac(&tmp2, &coeff, &tmp3);
}
//////////////////////////
// Fast outer product of tmp1 with a sum of terms suppressed by d_unroll
//////////////////////////
auto WWVV_v = WWVV[t].View();
for(int s1=0;s1<Ns;s1++){
for(int s2=0;s2<Ns;s2++){
WWVV_v[ss]()(s1,s2)(0,0) += tmp1()(s1)(0)*tmp2()(s2)(0);
WWVV_v[ss]()(s1,s2)(0,1) += tmp1()(s1)(0)*tmp2()(s2)(1);
WWVV_v[ss]()(s1,s2)(0,2) += tmp1()(s1)(0)*tmp2()(s2)(2);
WWVV_v[ss]()(s1,s2)(1,0) += tmp1()(s1)(1)*tmp2()(s2)(0);
WWVV_v[ss]()(s1,s2)(1,1) += tmp1()(s1)(1)*tmp2()(s2)(1);
WWVV_v[ss]()(s1,s2)(1,2) += tmp1()(s1)(1)*tmp2()(s2)(2);
WWVV_v[ss]()(s1,s2)(2,0) += tmp1()(s1)(2)*tmp2()(s2)(0);
WWVV_v[ss]()(s1,s2)(2,1) += tmp1()(s1)(2)*tmp2()(s2)(1);
WWVV_v[ss]()(s1,s2)(2,2) += tmp1()(s1)(2)*tmp2()(s2)(2);
}}
//////////////////////////
// Fast outer product of tmp1 with a sum of terms suppressed by d_unroll
//////////////////////////
OuterProductWWVV(WWVV[t], tmp1, tmp2, Ns, ss);
}}
}
});
}
template <class FImpl>
template <typename TensorType>
typename std::enable_if<!(std::is_same<Eigen::Tensor<ComplexD, 3>, TensorType>::value ||
std::is_same<Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>>, TensorType>::value),
void>::type
A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
const TensorType &WW_sd,
const FermionField *vs,
const FermionField *vd)
{
GridBase *grid = vs[0].Grid();
int nd = grid->_ndimension;
int Nsimd = grid->Nsimd();
int N_t = WW_sd.dimensions()[0];
int N_s = WW_sd.dimensions()[1];
int N_d = WW_sd.dimensions()[2];
int d_unroll = 32;// Empirical optimisation
Eigen::Matrix<Complex, -1, -1, Eigen::RowMajor> buf;
for(int t=0;t<N_t;t++){
WWVV[t] = Zero();
}
for (int t = 0; t < N_t; t++){
std::cout << GridLogMessage << "Contraction t = " << t << std::endl;
buf = WW_sd[t];
thread_for(ss,grid->oSites(),{
for(int d_o=0;d_o<N_d;d_o+=d_unroll){
for(int s=0;s<N_s;s++){
auto vs_v = vs[s].View();
auto tmp1 = vs_v[ss];
vobj tmp2 = Zero();
vobj tmp3 = Zero();
for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
auto vd_v = vd[d].View();
Scalar_v coeff = buf(s,d);
tmp3 = conjugate(vd_v[ss]);
mac(&tmp2, &coeff, &tmp3);
}
//////////////////////////
// Fast outer product of tmp1 with a sum of terms suppressed by d_unroll
//////////////////////////
OuterProductWWVV(WWVV[t], tmp1, tmp2, Ns, ss);
}}
});
}
}
template <class FImpl>
inline void A2Autils<FImpl>::OuterProductWWVV(PropagatorField &WWVV,
const vobj &lhs,
const vobj &rhs,
const int Ns, const int ss)
{
auto WWVV_v = WWVV.View();
for (int s1 = 0; s1 < Ns; s1++){
for (int s2 = 0; s2 < Ns; s2++){
WWVV_v[ss]()(s1,s2)(0, 0) += lhs()(s1)(0) * rhs()(s2)(0);
WWVV_v[ss]()(s1,s2)(0, 1) += lhs()(s1)(0) * rhs()(s2)(1);
WWVV_v[ss]()(s1,s2)(0, 2) += lhs()(s1)(0) * rhs()(s2)(2);
WWVV_v[ss]()(s1,s2)(1, 0) += lhs()(s1)(1) * rhs()(s2)(0);
WWVV_v[ss]()(s1,s2)(1, 1) += lhs()(s1)(1) * rhs()(s2)(1);
WWVV_v[ss]()(s1,s2)(1, 2) += lhs()(s1)(1) * rhs()(s2)(2);
WWVV_v[ss]()(s1,s2)(2, 0) += lhs()(s1)(2) * rhs()(s2)(0);
WWVV_v[ss]()(s1,s2)(2, 1) += lhs()(s1)(2) * rhs()(s2)(1);
WWVV_v[ss]()(s1,s2)(2, 2) += lhs()(s1)(2) * rhs()(s2)(2);
}
}
}
template<class FImpl>
void A2Autils<FImpl>::ContractFourQuarkColourDiagonal(const PropagatorField &WWVV0,

View File

@ -0,0 +1,624 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/utils/BaryonUtils.h
Copyright (C) 2019
Author: Felix Erben <felix.erben@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
//#include <Grid/Hadrons/Global.hpp>
#include <Grid/Eigen/unsupported/CXX11/Tensor>
NAMESPACE_BEGIN(Grid);
template <typename FImpl>
class BaryonUtils
{
public:
typedef typename FImpl::ComplexField ComplexField;
typedef typename FImpl::FermionField FermionField;
typedef typename FImpl::PropagatorField PropagatorField;
typedef typename FImpl::SitePropagator pobj;
typedef typename ComplexField::vector_object vobj;
typedef Lattice<iSpinMatrix<typename FImpl::Simd>> SpinMatrixField;
typedef typename SpinMatrixField::vector_object sobj;
static const int epsilon[6][3] ;
static const Complex epsilon_sgn[6];
private:
template <class mobj, class robj>
static void baryon_site(const mobj &D1,
const mobj &D2,
const mobj &D3,
const Gamma GammaA_left,
const Gamma GammaB_left,
const Gamma GammaA_right,
const Gamma GammaB_right,
const int parity,
const int * wick_contractions,
robj &result);
public:
static void ContractBaryons(const PropagatorField &q1_left,
const PropagatorField &q2_left,
const PropagatorField &q3_left,
const Gamma GammaA_left,
const Gamma GammaB_left,
const Gamma GammaA_right,
const Gamma GammaB_right,
const char * quarks_left,
const char * quarks_right,
const int parity,
ComplexField &baryon_corr);
template <class mobj, class robj>
static void ContractBaryons_Sliced(const mobj &D1,
const mobj &D2,
const mobj &D3,
const Gamma GammaA_left,
const Gamma GammaB_left,
const Gamma GammaA_right,
const Gamma GammaB_right,
const char * quarks_left,
const char * quarks_right,
const int parity,
robj &result);
private:
template <class mobj, class mobj2, class robj>
static void Sigma_to_Nucleon_Q1_Eye_site(const mobj &Dq_loop,
const mobj2 &Du_spec,
const mobj &Dd_tf,
const mobj &Ds_ti,
const Gamma Gamma_H,
const Gamma GammaB_sigma,
const Gamma GammaB_nucl,
robj &result);
template <class mobj, class mobj2, class robj>
static void Sigma_to_Nucleon_Q1_NonEye_site(const mobj &Du_ti,
const mobj &Du_tf,
const mobj2 &Du_spec,
const mobj &Dd_tf,
const mobj &Ds_ti,
const Gamma Gamma_H,
const Gamma GammaB_sigma,
const Gamma GammaB_nucl,
robj &result);
template <class mobj, class mobj2, class robj>
static void Sigma_to_Nucleon_Q2_Eye_site(const mobj &Dq_loop,
const mobj2 &Du_spec,
const mobj &Dd_tf,
const mobj &Ds_ti,
const Gamma Gamma_H,
const Gamma GammaB_sigma,
const Gamma GammaB_nucl,
robj &result);
template <class mobj, class mobj2, class robj>
static void Sigma_to_Nucleon_Q2_NonEye_site(const mobj &Du_ti,
const mobj &Du_tf,
const mobj2 &Du_spec,
const mobj &Dd_tf,
const mobj &Ds_ti,
const Gamma Gamma_H,
const Gamma GammaB_sigma,
const Gamma GammaB_nucl,
robj &result);
public:
template <class mobj>
static void Sigma_to_Nucleon_Eye(const PropagatorField &qq_loop,
const mobj &Du_spec,
const PropagatorField &qd_tf,
const PropagatorField &qs_ti,
const Gamma Gamma_H,
const Gamma GammaB_sigma,
const Gamma GammaB_nucl,
const std::string op,
SpinMatrixField &stn_corr);
template <class mobj>
static void Sigma_to_Nucleon_NonEye(const PropagatorField &qq_ti,
const PropagatorField &qq_tf,
const mobj &Du_spec,
const PropagatorField &qd_tf,
const PropagatorField &qs_ti,
const Gamma Gamma_H,
const Gamma GammaB_sigma,
const Gamma GammaB_nucl,
const std::string op,
SpinMatrixField &stn_corr);
};
template <class FImpl>
const int BaryonUtils<FImpl>::epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}};
template <class FImpl>
const Complex BaryonUtils<FImpl>::epsilon_sgn[6] = {Complex(1),
Complex(1),
Complex(1),
Complex(-1),
Complex(-1),
Complex(-1)};
template <class FImpl>
template <class mobj, class robj>
void BaryonUtils<FImpl>::baryon_site(const mobj &D1,
const mobj &D2,
const mobj &D3,
const Gamma GammaA_left,
const Gamma GammaB_left,
const Gamma GammaA_right,
const Gamma GammaB_right,
const int parity,
const int * wick_contraction,
robj &result)
{
Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4)
auto gD1a = GammaA_left * GammaA_right * D1;
auto gD1b = GammaA_left * g4 * GammaA_right * D1;
auto pD1 = 0.5* (gD1a + (double)parity * gD1b);
auto gD3 = GammaB_right * D3;
for (int ie_left=0; ie_left < 6 ; ie_left++){
int a_left = epsilon[ie_left][0]; //a
int b_left = epsilon[ie_left][1]; //b
int c_left = epsilon[ie_left][2]; //c
for (int ie_right=0; ie_right < 6 ; ie_right++){
int a_right = epsilon[ie_right][0]; //a'
int b_right = epsilon[ie_right][1]; //b'
int c_right = epsilon[ie_right][2]; //c'
//This is the \delta_{456}^{123} part
if (wick_contraction[0]){
auto D2g = D2 * GammaB_left;
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
for (int beta_left=0; beta_left<Ns; beta_left++){
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
result()()() += epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1()(gamma_left,gamma_left)(c_right,c_left)*D2g()(alpha_right,beta_left)(a_right,a_left)*gD3()(alpha_right,beta_left)(b_right,b_left);
}}}
}
//This is the \delta_{456}^{231} part
if (wick_contraction[1]){
auto pD1g = pD1 * GammaB_left;
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
for (int beta_left=0; beta_left<Ns; beta_left++){
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
result()()() += epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1g()(gamma_left,beta_left)(c_right,a_left)*D2()(alpha_right,beta_left)(a_right,b_left)*gD3()(alpha_right,gamma_left)(b_right,c_left);
}}}
}
//This is the \delta_{456}^{312} part
if (wick_contraction[2]){
auto gD3g = gD3 * GammaB_left;
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
for (int beta_left=0; beta_left<Ns; beta_left++){
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
result()()() += epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1()(gamma_left,beta_left)(c_right,b_left)*D2()(alpha_right,gamma_left)(a_right,c_left)*gD3g()(alpha_right,beta_left)(b_right,a_left);
}}}
}
//This is the \delta_{456}^{132} part
if (wick_contraction[3]){
auto gD3g = gD3 * GammaB_left;
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
for (int beta_left=0; beta_left<Ns; beta_left++){
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
result()()() -= epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1()(gamma_left,gamma_left)(c_right,c_left)*D2()(alpha_right,beta_left)(a_right,b_left)*gD3g()(alpha_right,beta_left)(b_right,a_left);
}}}
}
//This is the \delta_{456}^{321} part
if (wick_contraction[4]){
auto D2g = D2 * GammaB_left;
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
for (int beta_left=0; beta_left<Ns; beta_left++){
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
result()()() -= epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1()(gamma_left,beta_left)(c_right,b_left)*D2g()(alpha_right,beta_left)(a_right,a_left)*gD3()(alpha_right,gamma_left)(b_right,c_left);
}}}
}
//This is the \delta_{456}^{213} part
if (wick_contraction[5]){
auto pD1g = pD1 * GammaB_left;
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
for (int beta_left=0; beta_left<Ns; beta_left++){
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
result()()() -= epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1g()(gamma_left,beta_left)(c_right,a_left)*D2()(alpha_right,gamma_left)(a_right,c_left)*gD3()(alpha_right,beta_left)(b_right,b_left);
}}}
}
}
}
}
template<class FImpl>
void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
const PropagatorField &q2_left,
const PropagatorField &q3_left,
const Gamma GammaA_left,
const Gamma GammaB_left,
const Gamma GammaA_right,
const Gamma GammaB_right,
const char * quarks_left,
const char * quarks_right,
const int parity,
ComplexField &baryon_corr)
{
std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl;
std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl;
std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl;
std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl;
std::cout << "GammaB (right) " << (GammaB_right.g) << std::endl;
assert(parity==1 || parity == -1 && "Parity must be +1 or -1");
GridBase *grid = q1_left.Grid();
int wick_contraction[6];
for (int ie=0; ie < 6 ; ie++)
wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0;
auto vbaryon_corr= baryon_corr.View();
auto v1 = q1_left.View();
auto v2 = q2_left.View();
auto v3 = q3_left.View();
// accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
thread_for(ss,grid->oSites(),{
//for(int ss=0; ss < grid->oSites(); ss++){
auto D1 = v1[ss];
auto D2 = v2[ss];
auto D3 = v3[ss];
vobj result=Zero();
baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result);
vbaryon_corr[ss] = result;
} );//end loop over lattice sites
}
template <class FImpl>
template <class mobj, class robj>
void BaryonUtils<FImpl>::ContractBaryons_Sliced(const mobj &D1,
const mobj &D2,
const mobj &D3,
const Gamma GammaA_left,
const Gamma GammaB_left,
const Gamma GammaA_right,
const Gamma GammaB_right,
const char * quarks_left,
const char * quarks_right,
const int parity,
robj &result)
{
std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl;
std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl;
std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl;
std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl;
std::cout << "GammaB (right) " << (GammaB_right.g) << std::endl;
assert(parity==1 || parity == -1 && "Parity must be +1 or -1");
int wick_contraction[6];
for (int ie=0; ie < 6 ; ie++)
wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0;
result=Zero();
baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result);
}
/***********************************************************************
* End of Baryon 2pt-function code. *
* *
* The following code is for Sigma -> N rare hypeon decays *
**********************************************************************/
/* Dq_loop is a quark line from t_H to t_H
* Du_spec is a quark line from t_i to t_f
* Dd_tf is a quark line from t_f to t_H
* Ds_ti is a quark line from t_i to t_H */
template <class FImpl>
template <class mobj, class mobj2, class robj>
void BaryonUtils<FImpl>::Sigma_to_Nucleon_Q1_Eye_site(const mobj &Dq_loop,
const mobj2 &Du_spec,
const mobj &Dd_tf,
const mobj &Ds_ti,
const Gamma Gamma_H,
const Gamma GammaB_sigma,
const Gamma GammaB_nucl,
robj &result)
{
Gamma g5(Gamma::Algebra::Gamma5);
auto DuG = Du_spec * GammaB_nucl;
// Gamma^B * Ds * \gamma_\mu^L * (\gamma_5 * Dd^\dagger * \gamma_5)
auto GDsGDd = GammaB_sigma * Ds_ti * Gamma_H * g5 * adj(Dd_tf) * g5;
// Dq_loop * \gamma_\mu^L
auto DqG = Dq_loop * Gamma_H;
for (int ie_n=0; ie_n < 6 ; ie_n++){
int a_n = epsilon[ie_n][0]; //a
int b_n = epsilon[ie_n][1]; //b
int c_n = epsilon[ie_n][2]; //c
for (int ie_s=0; ie_s < 6 ; ie_s++){
int a_s = epsilon[ie_s][0]; //a'
int b_s = epsilon[ie_s][1]; //b'
int c_s = epsilon[ie_s][2]; //c'
for (int alpha_s=0; alpha_s<Ns; alpha_s++){
for (int beta_n=0; beta_n<Ns; beta_n++){
auto GDsGDd_ab_bb = GDsGDd()(alpha_s,beta_n)(b_s,b_n);
for (int tau2=0; tau2<Ns; tau2++){
for (int j=0; j<Nc; j++){
auto DqG_tt_jj = DqG()(tau2,tau2)(j,j);
auto ee_GDGDDG = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsGDd_ab_bb * DqG_tt_jj;
for (int gamma_s=0; gamma_s<Ns; gamma_s++){
for (int gamma_n=0; gamma_n<Ns; gamma_n++){
result()(gamma_s,gamma_n)() += ee_GDGDDG * DuG()(alpha_s, beta_n)(a_s,a_n) * Du_spec()(gamma_s,gamma_n)(c_s,c_n);
result()(gamma_s,gamma_n)() -= ee_GDGDDG * DuG()(gamma_s, beta_n)(c_s,a_n) * Du_spec()(alpha_s,gamma_n)(a_s,c_n);
}}
}}
}}
}
}
}
/* Du_ti is a quark line from t_i to t_H
* Du_tf is a quark line from t_f to t_H
* Du_spec is a quark line from t_i to t_f
* Dd_tf is a quark line from t_f to t_H
* Ds_ti is a quark line from t_i to t_H */
template <class FImpl>
template <class mobj, class mobj2, class robj>
void BaryonUtils<FImpl>::Sigma_to_Nucleon_Q1_NonEye_site(const mobj &Du_ti,
const mobj &Du_tf,
const mobj2 &Du_spec,
const mobj &Dd_tf,
const mobj &Ds_ti,
const Gamma Gamma_H,
const Gamma GammaB_sigma,
const Gamma GammaB_nucl,
robj &result)
{
Gamma g5(Gamma::Algebra::Gamma5);
auto DuG = Du_spec * GammaB_nucl;
auto adjDu = g5 * adj(Du_tf) * g5;
auto adjDuG = adjDu * GammaB_nucl;
// Gamma^B * Ds * \gamma_\mu^L * (\gamma_5 * Dd^\dagger * \gamma_5)
auto GDsGDd = GammaB_sigma * Ds_ti * Gamma_H * g5 * adj(Dd_tf) * g5;
// Dq_loop * \gamma_\mu^L
auto DuGH = Du_ti * Gamma_H;
for (int ie_n=0; ie_n < 6 ; ie_n++){
int a_n = epsilon[ie_n][0]; //a
int b_n = epsilon[ie_n][1]; //b
int c_n = epsilon[ie_n][2]; //c
for (int ie_s=0; ie_s < 6 ; ie_s++){
int a_s = epsilon[ie_s][0]; //a'
int b_s = epsilon[ie_s][1]; //b'
int c_s = epsilon[ie_s][2]; //c'
for (int alpha_s=0; alpha_s<Ns; alpha_s++){
for (int beta_n=0; beta_n<Ns; beta_n++){
auto GDsGDd_ab_bb = GDsGDd()(alpha_s,beta_n)(b_s,b_n);
for (int tau2=0; tau2<Ns; tau2++){
for (int j=0; j<Nc; j++){
auto DuGH_at_aj = DuGH()(alpha_s,tau2)(a_s,j);
auto ee_GDGDDG_a = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsGDd_ab_bb * DuGH_at_aj;
for (int gamma_s=0; gamma_s<Ns; gamma_s++){
auto DuGH_gt_cj = DuGH()(gamma_s,tau2)(c_s,j);
auto ee_GDGDDG_c = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsGDd_ab_bb * DuGH_gt_cj;
for (int gamma_n=0; gamma_n<Ns; gamma_n++){
result()(gamma_s,gamma_n)() += ee_GDGDDG_a * DuG()(gamma_s, beta_n)(c_s,a_n) * adjDu()(tau2,gamma_n)(j,c_n);
result()(gamma_s,gamma_n)() += ee_GDGDDG_c * adjDuG()(tau2, beta_n)(j,a_n) * Du_spec()(alpha_s,gamma_n)(a_s,c_n);
result()(gamma_s,gamma_n)() -= ee_GDGDDG_a * adjDuG()(tau2, beta_n)(j,a_n) * Du_spec()(gamma_s,gamma_n)(c_s,c_n);
result()(gamma_s,gamma_n)() -= ee_GDGDDG_c * DuG()(alpha_s, beta_n)(a_s,a_n) * adjDu()(tau2,gamma_n)(j,c_n);
}
}
}}
}}
}
}
}
//Equivalent to "One-trace"
/* Dq_loop is a quark line from t_H to t_H
* Du_spec is a quark line from t_i to t_f
* Dd_tf is a quark line from t_f to t_H
* Ds_ti is a quark line from t_i to t_H */
template <class FImpl>
template <class mobj, class mobj2, class robj>
void BaryonUtils<FImpl>::Sigma_to_Nucleon_Q2_Eye_site(const mobj &Dq_loop,
const mobj2 &Du_spec,
const mobj &Dd_tf,
const mobj &Ds_ti,
const Gamma Gamma_H,
const Gamma GammaB_sigma,
const Gamma GammaB_nucl,
robj &result)
{
Gamma g5(Gamma::Algebra::Gamma5);
auto DuG = Du_spec * GammaB_nucl;
// Gamma^B * Ds * \gamma_\mu^L
auto GDsG = GammaB_sigma * Ds_ti * Gamma_H;
// Dq_loop * \gamma_\mu^L * (\gamma_5 * Dd^\dagger * \gamma_5)
auto DqGDd = Dq_loop * Gamma_H * g5 * adj(Dd_tf) * g5;
for (int ie_n=0; ie_n < 6 ; ie_n++){
int a_n = epsilon[ie_n][0]; //a
int b_n = epsilon[ie_n][1]; //b
int c_n = epsilon[ie_n][2]; //c
for (int ie_s=0; ie_s < 6 ; ie_s++){
int a_s = epsilon[ie_s][0]; //a'
int b_s = epsilon[ie_s][1]; //b'
int c_s = epsilon[ie_s][2]; //c'
for (int alpha_s=0; alpha_s<Ns; alpha_s++){
for (int tau=0; tau<Ns; tau++){
for (int i=0; i<Nc; i++){
auto GDsG_at_bi = GDsG()(alpha_s,tau)(b_s,i);
for (int beta_n=0; beta_n<Ns; beta_n++){
auto DqGDd_tb_ib = DqGDd()(tau,beta_n)(i,b_n);
auto ee_GDGDGD = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsG_at_bi * DqGDd_tb_ib;
for (int gamma_s=0; gamma_s<Ns; gamma_s++){
for (int gamma_n=0; gamma_n<Ns; gamma_n++){
result()(gamma_s,gamma_n)() -= ee_GDGDGD * DuG()(alpha_s, beta_n)(a_s,a_n) * Du_spec()(gamma_s,gamma_n)(c_s,c_n);
result()(gamma_s,gamma_n)() += ee_GDGDGD * DuG()(gamma_s, beta_n)(c_s,a_n) * Du_spec()(alpha_s,gamma_n)(a_s,c_n);
}}
}
}}}
}
}
}
/* Du_ti is a quark line from t_i to t_H
* Du_tf is a quark line from t_f to t_H
* Du_spec is a quark line from t_i to t_f
* Dd_tf is a quark line from t_f to t_H
* Ds_ti is a quark line from t_i to t_H */
template <class FImpl>
template <class mobj, class mobj2, class robj>
void BaryonUtils<FImpl>::Sigma_to_Nucleon_Q2_NonEye_site(const mobj &Du_ti,
const mobj &Du_tf,
const mobj2 &Du_spec,
const mobj &Dd_tf,
const mobj &Ds_ti,
const Gamma Gamma_H,
const Gamma GammaB_sigma,
const Gamma GammaB_nucl,
robj &result)
{
Gamma g5(Gamma::Algebra::Gamma5);
auto DuG = Du_spec * GammaB_nucl;
auto adjDu = g5 * adj(Du_tf) * g5;
auto adjDuG = adjDu * GammaB_nucl;
// Gamma^B * Ds * \gamma_\mu^L
auto GDsG = GammaB_sigma * Ds_ti * Gamma_H;
// Du * \gamma_\mu^L * (\gamma_5 * Dd^\dagger * \gamma_5)
auto DuGDd = Du_ti * Gamma_H * g5 * adj(Dd_tf) * g5;
for (int ie_n=0; ie_n < 6 ; ie_n++){
int a_n = epsilon[ie_n][0]; //a
int b_n = epsilon[ie_n][1]; //b
int c_n = epsilon[ie_n][2]; //c
for (int ie_s=0; ie_s < 6 ; ie_s++){
int a_s = epsilon[ie_s][0]; //a'
int b_s = epsilon[ie_s][1]; //b'
int c_s = epsilon[ie_s][2]; //c'
for (int alpha_s=0; alpha_s<Ns; alpha_s++){
for (int tau=0; tau<Ns; tau++){
for (int i=0; i<Nc; i++){
auto GDsG_at_bi = GDsG()(alpha_s,tau)(b_s,i);
for (int beta_n=0; beta_n<Ns; beta_n++){
auto DuGDd_ab_ab = DuGDd()(alpha_s,beta_n)(a_s,b_n);
auto ee_GDGDGD_a = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsG_at_bi * DuGDd_ab_ab;
for (int gamma_s=0; gamma_s<Ns; gamma_s++){
auto DuGDd_gb_cb = DuGDd()(gamma_s,beta_n)(c_s,b_n);
auto ee_GDGDGD_c = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsG_at_bi * DuGDd_gb_cb;
for (int gamma_n=0; gamma_n<Ns; gamma_n++){
result()(gamma_s,gamma_n)() -= ee_GDGDGD_a * DuG()(gamma_s, beta_n)(c_s,a_n) * adjDu()(tau,gamma_n)(i,c_n);
result()(gamma_s,gamma_n)() -= ee_GDGDGD_c * adjDuG()(tau, beta_n)(i,a_n) * Du_spec()(alpha_s,gamma_n)(a_s,c_n);
result()(gamma_s,gamma_n)() += ee_GDGDGD_a * adjDuG()(tau, beta_n)(i,a_n) * Du_spec()(gamma_s,gamma_n)(c_s,c_n);
result()(gamma_s,gamma_n)() += ee_GDGDGD_c * DuG()(alpha_s, beta_n)(a_s,a_n) * adjDu()(tau,gamma_n)(i,c_n);
}
}
}
}}}
}
}
}
template<class FImpl>
template <class mobj>
void BaryonUtils<FImpl>::Sigma_to_Nucleon_Eye(const PropagatorField &qq_loop,
const mobj &Du_spec,
const PropagatorField &qd_tf,
const PropagatorField &qs_ti,
const Gamma Gamma_H,
const Gamma GammaB_sigma,
const Gamma GammaB_nucl,
const std::string op,
SpinMatrixField &stn_corr)
{
GridBase *grid = qs_ti.Grid();
auto vcorr= stn_corr.View();
auto vq_loop = qq_loop.View();
auto vd_tf = qd_tf.View();
auto vs_ti = qs_ti.View();
// accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
thread_for(ss,grid->oSites(),{
auto Dq_loop = vq_loop[ss];
auto Dd_tf = vd_tf[ss];
auto Ds_ti = vs_ti[ss];
sobj result=Zero();
if(op == "Q1"){
Sigma_to_Nucleon_Q1_Eye_site(Dq_loop,Du_spec,Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
} else if(op == "Q2"){
Sigma_to_Nucleon_Q2_Eye_site(Dq_loop,Du_spec,Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
} else {
assert(0 && "Weak Operator not correctly specified");
}
vcorr[ss] = result;
} );//end loop over lattice sites
}
template<class FImpl>
template <class mobj>
void BaryonUtils<FImpl>::Sigma_to_Nucleon_NonEye(const PropagatorField &qq_ti,
const PropagatorField &qq_tf,
const mobj &Du_spec,
const PropagatorField &qd_tf,
const PropagatorField &qs_ti,
const Gamma Gamma_H,
const Gamma GammaB_sigma,
const Gamma GammaB_nucl,
const std::string op,
SpinMatrixField &stn_corr)
{
GridBase *grid = qs_ti.Grid();
auto vcorr= stn_corr.View();
auto vq_ti = qq_ti.View();
auto vq_tf = qq_tf.View();
auto vd_tf = qd_tf.View();
auto vs_ti = qs_ti.View();
// accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
thread_for(ss,grid->oSites(),{
auto Dq_ti = vq_ti[ss];
auto Dq_tf = vq_tf[ss];
auto Dd_tf = vd_tf[ss];
auto Ds_ti = vs_ti[ss];
sobj result=Zero();
if(op == "Q1"){
Sigma_to_Nucleon_Q1_NonEye_site(Dq_ti,Dq_tf,Du_spec,Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
} else if(op == "Q2"){
Sigma_to_Nucleon_Q2_NonEye_site(Dq_ti,Dq_tf,Du_spec,Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
} else {
assert(0 && "Weak Operator not correctly specified");
}
vcorr[ss] = result;
} );//end loop over lattice sites
}
NAMESPACE_END(Grid);

View File

@ -92,6 +92,7 @@ public:
};
void Mdir(const GaugeField&, GaugeField&, int, int){ assert(0);}
void MdirAll(const GaugeField&, std::vector<GaugeField> &){ assert(0);}
void Mdiag(const GaugeField&, GaugeField&){ assert(0);}
void ImportGauge(const GaugeField& _U) {

View File

@ -27,8 +27,7 @@ directory
*************************************************************************************/
#pragma once
namespace Grid {
namespace QCD {
NAMESPACE_BEGIN(Grid);
template <class Gimpl> class CovariantSmearing : public Gimpl
{
@ -84,4 +83,5 @@ public:
}
}
};
}}
NAMESPACE_END(Grid);

View File

@ -201,7 +201,6 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
});
}
// I explicitly need these outside the QCD namespace
template<typename vobj>
void G5C(Lattice<vobj> &z, const Lattice<vobj> &x)
{

View File

@ -403,6 +403,10 @@ namespace Optimization {
accelerator_inline GpuVectorRD operator()(GpuVectorRD a, GpuVectorRD b){
return a/b;
}
accelerator_inline GpuVectorI operator()(GpuVectorI a, GpuVectorI b){
return a/b;
}
// Danger -- element wise divide fro complex, not complex div.
// See Grid_vector_types.h lines around 735, applied after "toReal"
accelerator_inline GpuVectorCF operator()(GpuVectorCF a, GpuVectorCF b){

View File

@ -1233,7 +1233,7 @@ public:
};
void Report(void) {
#define AVERAGE(A) _grid->GlobalSum(A);A/=NP;
#define AVERAGE(A)
#define PRINTIT(A) AVERAGE(A); std::cout << GridLogMessage << " Stencil " << #A << " "<< A/calls<<std::endl;
RealD NP = _grid->_Nprocessors;
RealD NN = _grid->NodeCount();
@ -1281,11 +1281,13 @@ public:
std::cout << GridLogMessage << " Stencil SHM mem " << (membytes)/gatheralltime/1000. << " GB/s per rank"<<std::endl;
std::cout << GridLogMessage << " Stencil SHM mem " << (membytes)/gatheralltime/1000.*NP/NN << " GB/s per node"<<std::endl;
}
/*
PRINTIT(mpi3synctime);
PRINTIT(mpi3synctime_g);
PRINTIT(shmmergetime);
PRINTIT(splicetime);
PRINTIT(nosplicetime);
*/
}
#undef PRINTIT
#undef AVERAGE

View File

@ -60,166 +60,243 @@ template<class l,int N> accelerator_inline iMatrix<l,N> operator * (const typena
////////////////////////////////////////////////////////////////////
// Double support; cast to "scalar_type" through constructor
////////////////////////////////////////////////////////////////////
template<class l> accelerator_inline iScalar<l> operator * (const iScalar<l>& lhs,double rhs)
template<class l,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iScalar<l> operator * (const iScalar<l>& lhs,double rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,double>::value,int>::type i=0;
typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs;
}
template<class l> accelerator_inline iScalar<l> operator * (double lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iScalar<l> operator * (double lhs,const iScalar<l>& rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,double>::value,int>::type i=0;
return rhs*lhs;
}
template<class l,int N> accelerator_inline iVector<l,N> operator * (const iVector<l,N>& lhs,double rhs)
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iVector<l,N> operator * (const iVector<l,N>& lhs,double rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,double>::value,int>::type i=0;
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs;
}
template<class l,int N> accelerator_inline iVector<l,N> operator * (double lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iVector<l,N> operator * (double lhs,const iVector<l,N>& rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,double>::value,int>::type i=0;
return rhs*lhs;
}
template<class l,int N> accelerator_inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,double rhs)
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,double rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,double>::value,int>::type i=0;
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs;
}
template<class l,int N> accelerator_inline iMatrix<l,N> operator * (double lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iMatrix<l,N> operator * (double lhs,const iMatrix<l,N>& rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,double>::value,int>::type i=0;
return rhs*lhs;
}
////////////////////////////////////////////////////////////////////
// Complex support; cast to "scalar_type" through constructor
////////////////////////////////////////////////////////////////////
template<class l> accelerator_inline iScalar<l> operator * (const iScalar<l>& lhs,ComplexD rhs)
template<class l,IfNotSame<typename iScalar<l>::scalar_type,ComplexD> = 0>
accelerator_inline iScalar<l> operator * (const iScalar<l>& lhs,ComplexD rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,ComplexD>::value,int>::type i=0;
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs;
}
template<class l> accelerator_inline iScalar<l> operator * (ComplexD lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l,int N> accelerator_inline iVector<l,N> operator * (const iVector<l,N>& lhs,ComplexD rhs)
template<class l,IfNotSame<typename iScalar<l>::scalar_type,ComplexD> = 0>
accelerator_inline iScalar<l> operator * (ComplexD lhs,const iScalar<l>& rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,ComplexD>::value,int>::type i=0;
return rhs*lhs;
}
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,ComplexD> = 0>
accelerator_inline iVector<l,N> operator * (const iVector<l,N>& lhs,ComplexD rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,ComplexD>::value,int>::type i=0;
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs;
}
template<class l,int N> accelerator_inline iVector<l,N> operator * (ComplexD lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
template<class l,int N> accelerator_inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,ComplexD rhs)
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,ComplexD> = 0>
accelerator_inline iVector<l,N> operator * (ComplexD lhs,const iVector<l,N>& rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,ComplexD>::value,int>::type i=0;
return rhs*lhs;
}
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,ComplexD> = 0>
accelerator_inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,ComplexD rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,ComplexD>::value,int>::type i=0;
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs;
}
template<class l,int N> accelerator_inline iMatrix<l,N> operator * (ComplexD lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,ComplexD> = 0>
accelerator_inline iMatrix<l,N> operator * (ComplexD lhs,const iMatrix<l,N>& rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,ComplexD>::value,int>::type i=0;
return rhs*lhs;
}
////////////////////////////////////////////////////////////////////
// Integer support; cast to "scalar_type" through constructor
////////////////////////////////////////////////////////////////////
template<class l> accelerator_inline iScalar<l> operator * (const iScalar<l>& lhs,Integer rhs)
template<class l,IfNotSame<typename iScalar<l>::scalar_type,Integer> = 0>
accelerator_inline iScalar<l> operator * (const iScalar<l>& lhs,Integer rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,ComplexD>::value,int>::type i=0;
typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs*srhs;
}
template<class l> accelerator_inline iScalar<l> operator * (Integer lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l,IfNotSame<typename iScalar<l>::scalar_type,Integer> = 0>
accelerator_inline iScalar<l> operator * (Integer lhs,const iScalar<l>& rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,Integer>::value,int>::type i=0;
return rhs*lhs;
}
template<class l,int N> accelerator_inline iVector<l,N> operator * (const iVector<l,N>& lhs,Integer rhs)
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,Integer> = 0>
accelerator_inline iVector<l,N> operator * (const iVector<l,N>& lhs,Integer rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,Integer>::value,int>::type i=0;
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs;
}
template<class l,int N> accelerator_inline iVector<l,N> operator * (Integer lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
template<class l,int N> accelerator_inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,Integer rhs)
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,Integer> = 0>
accelerator_inline iVector<l,N> operator * (Integer lhs,const iVector<l,N>& rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,Integer>::value,int>::type i=0;
return rhs*lhs;
}
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,Integer> = 0>
accelerator_inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,Integer rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,Integer>::value,int>::type i=0;
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs;
}
template<class l,int N> accelerator_inline iMatrix<l,N> operator * (Integer lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,Integer> = 0>
accelerator_inline iMatrix<l,N> operator * (Integer lhs,const iMatrix<l,N>& rhs)
{
// typename std::enable_if<!std::is_same<typename iScalar<l>::scalar_type,Integer>::value,int>::type i=0;
return rhs*lhs;
}
///////////////////////////////////////////////////////////////////////////////////////////////
// addition by fundamental scalar type applies to matrix(down diag) and scalar
///////////////////////////////////////////////////////////////////////////////////////////////
template<class l> accelerator_inline iScalar<l> operator + (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
template<class l>
accelerator_inline iScalar<l> operator + (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
{
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs+srhs;
}
template<class l> accelerator_inline iScalar<l> operator + (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs+lhs; }
template<class l>
accelerator_inline iScalar<l> operator + (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs+lhs; }
template<class l,int N> accelerator_inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
template<class l,int N>
accelerator_inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
{
typename iMatrix<l,N>::tensor_reduced srhs; srhs=rhs;
return lhs+srhs;
}
template<class l,int N> accelerator_inline iMatrix<l,N> operator + (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
template<class l,int N>
accelerator_inline iMatrix<l,N> operator + (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
////////////////////////////////////////////////////////////////////
// Double support; cast to "scalar_type" through constructor
////////////////////////////////////////////////////////////////////
template<class l> accelerator_inline iScalar<l> operator + (const iScalar<l>& lhs,double rhs)
template<class l,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iScalar<l> operator + (const iScalar<l>& lhs,double rhs)
{
typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs+srhs;
}
template<class l> accelerator_inline iScalar<l> operator + (double lhs,const iScalar<l>& rhs) { return rhs+lhs; }
template<class l,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iScalar<l> operator + (double lhs,const iScalar<l>& rhs) { return rhs+lhs; }
template<class l,int N> accelerator_inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,double rhs)
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,double rhs)
{
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs+srhs;
}
template<class l,int N> accelerator_inline iMatrix<l,N> operator + (double lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iMatrix<l,N> operator + (double lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
// Integer support cast to scalar type through constructor
template<class l> accelerator_inline iScalar<l> operator + (const iScalar<l>& lhs,Integer rhs)
template<class l,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iScalar<l> operator + (const iScalar<l>& lhs,Integer rhs)
{
typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs+srhs;
}
template<class l> accelerator_inline iScalar<l> operator + (Integer lhs,const iScalar<l>& rhs) { return rhs+lhs; }
template<class l,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iScalar<l> operator + (Integer lhs,const iScalar<l>& rhs) { return rhs+lhs; }
template<class l,int N> accelerator_inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,Integer rhs)
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,Integer rhs)
{
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs+srhs;
}
template<class l,int N> accelerator_inline iMatrix<l,N> operator + (Integer lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iMatrix<l,N> operator + (Integer lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
///////////////////////////////////////////////////////////////////////////////////////////////
// subtraction of fundamental scalar type applies to matrix(down diag) and scalar
///////////////////////////////////////////////////////////////////////////////////////////////
template<class l> accelerator_inline iScalar<l> operator - (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
template<class l>
accelerator_inline iScalar<l> operator - (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
{
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs-srhs;
}
template<class l> accelerator_inline iScalar<l> operator - (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs)
template<class l>
accelerator_inline iScalar<l> operator - (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs)
{
typename iScalar<l>::tensor_reduced slhs;slhs=lhs;
return slhs-rhs;
}
template<class l,int N> accelerator_inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
template<class l,int N>
accelerator_inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
{
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs-srhs;
}
template<class l,int N> accelerator_inline iMatrix<l,N> operator - (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs)
template<class l,int N>
accelerator_inline iMatrix<l,N> operator - (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs)
{
typename iScalar<l>::tensor_reduced slhs;slhs=lhs;
return slhs-rhs;
@ -228,26 +305,30 @@ template<class l,int N> accelerator_inline iMatrix<l,N> operator - (const typena
////////////////////////////////////////////////////////////////////
// Double support; cast to "scalar_type" through constructor
////////////////////////////////////////////////////////////////////
template<class l> accelerator_inline iScalar<l> operator - (const iScalar<l>& lhs,double rhs)
template<class l,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iScalar<l> operator - (const iScalar<l>& lhs,double rhs)
{
typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs-srhs;
}
template<class l> accelerator_inline iScalar<l> operator - (double lhs,const iScalar<l>& rhs)
template<class l,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iScalar<l> operator - (double lhs,const iScalar<l>& rhs)
{
typename iScalar<l>::scalar_type t(lhs);
typename iScalar<l>::tensor_reduced slhs;slhs=t;
return slhs-rhs;
}
template<class l,int N> accelerator_inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,double rhs)
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,double rhs)
{
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs-srhs;
}
template<class l,int N> accelerator_inline iMatrix<l,N> operator - (double lhs,const iMatrix<l,N>& rhs)
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,double> = 0>
accelerator_inline iMatrix<l,N> operator - (double lhs,const iMatrix<l,N>& rhs)
{
typename iScalar<l>::scalar_type t(lhs);
typename iScalar<l>::tensor_reduced slhs;slhs=t;
@ -257,25 +338,29 @@ template<class l,int N> accelerator_inline iMatrix<l,N> operator - (double lhs,c
////////////////////////////////////////////////////////////////////
// Integer support; cast to "scalar_type" through constructor
////////////////////////////////////////////////////////////////////
template<class l> accelerator_inline iScalar<l> operator - (const iScalar<l>& lhs,Integer rhs)
template<class l,IfNotSame<typename iScalar<l>::scalar_type,Integer> = 0>
accelerator_inline iScalar<l> operator - (const iScalar<l>& lhs,Integer rhs)
{
typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs-srhs;
}
template<class l> accelerator_inline iScalar<l> operator - (Integer lhs,const iScalar<l>& rhs)
template<class l,IfNotSame<typename iScalar<l>::scalar_type,Integer> = 0>
accelerator_inline iScalar<l> operator - (Integer lhs,const iScalar<l>& rhs)
{
typename iScalar<l>::scalar_type t;t=lhs;
typename iScalar<l>::tensor_reduced slhs;slhs=t;
return slhs-rhs;
}
template<class l,int N> accelerator_inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,Integer rhs)
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,Integer> = 0>
accelerator_inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,Integer rhs)
{
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs-srhs;
}
template<class l,int N> accelerator_inline iMatrix<l,N> operator - (Integer lhs,const iMatrix<l,N>& rhs)
template<class l,int N,IfNotSame<typename iScalar<l>::scalar_type,Integer> = 0>
accelerator_inline iMatrix<l,N> operator - (Integer lhs,const iMatrix<l,N>& rhs)
{
typename iScalar<l>::scalar_type t;t=lhs;
typename iScalar<l>::tensor_reduced slhs;slhs=t;

View File

@ -57,14 +57,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define thread_max(a) (1)
#endif
#define naked_for(i,num,...) for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
#define naked_foreach(i,container,...) for ( uint64_t i=container.begin();i<container.end();i++) { __VA_ARGS__ } ;
#define thread_for( i, num, ... ) DO_PRAGMA(omp parallel for schedule(static)) naked_for(i,num,{__VA_ARGS__});
#define thread_foreach( i, num, ... ) DO_PRAGMA(omp parallel for schedule(static)) naked_foreach(i,num,{__VA_ARGS__});
#define thread_for_in_region( i, num, ... ) DO_PRAGMA(omp for schedule(static)) naked_for(i,num,{__VA_ARGS__});
#define thread_for_collapse2( i, num, ... ) DO_PRAGMA(omp parallel for collapse(2)) naked_for(i,num,{__VA_ARGS__});
#define thread_for_collapse( N , i, num, ... ) DO_PRAGMA(omp parallel for collapse ( N ) ) naked_for(i,num,{__VA_ARGS__});
#define thread_for_collapse_in_region( N , i, num, ... ) DO_PRAGMA(omp for collapse ( N )) naked_for(i,num,{__VA_ARGS__});
#define thread_for( i, num, ... ) DO_PRAGMA(omp parallel for schedule(static)) for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
#define thread_foreach( i, container, ... ) DO_PRAGMA(omp parallel for schedule(static)) for ( uint64_t i=container.begin();i<container.end();i++) { __VA_ARGS__ } ;
#define thread_for_in_region( i, num, ... ) DO_PRAGMA(omp for schedule(static)) for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
#define thread_for_collapse2( i, num, ... ) DO_PRAGMA(omp parallel for collapse(2)) for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
#define thread_for_collapse( N , i, num, ... ) DO_PRAGMA(omp parallel for collapse ( N ) ) for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
#define thread_for_collapse_in_region( N , i, num, ... ) DO_PRAGMA(omp for collapse ( N )) for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
#define thread_region DO_PRAGMA(omp parallel)
#define thread_critical DO_PRAGMA(omp critical)

View File

@ -292,7 +292,7 @@ void GridGpuInit(void)
gpu_props = new cudaDeviceProp[nDevices];
char * localRankStr = NULL;
int rank = 0, device = 0, world_rank=0;
int rank = 0, world_rank=0;
#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK"
#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK"
@ -301,23 +301,16 @@ void GridGpuInit(void)
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
{
rank = atoi(localRankStr);
device = rank %nDevices;
}
if ((localRankStr = getenv(ENV_LOCAL_RANK_MVAPICH)) != NULL)
{
rank = atoi(localRankStr);
device = rank %nDevices;
}
if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);}
if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
cudaSetDevice(device);
if ( world_rank == 0 ) {
GridBanner();
printf("GpuInit: ================================================\n");
printf("GpuInit: Setting up Cuda Device map before first MPI call\n",nDevices);
printf("GpuInit: ================================================\n");
printf("GpuInit: Cuda reports %d GPUs on MPI rank 0\n",nDevices);
}
for (int i = 0; i < nDevices; i++) {
@ -325,7 +318,6 @@ void GridGpuInit(void)
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("GpuInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
// cudaGetDeviceProperties(&prop, i);
cudaGetDeviceProperties(&gpu_props[i], i);
if ( world_rank == 0) {
cudaDeviceProp prop;
@ -334,15 +326,13 @@ void GridGpuInit(void)
printf("GpuInit: Device Number : %d\n", i);
printf("GpuInit: ========================\n");
printf("GpuInit: Device identifier: %s\n", prop.name);
// printf("GpuInit: Peak Memory Bandwidth (GB/s): %f\n",(float)2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6);
GPU_PROP(managedMemory);
GPU_PROP(isMultiGpuBoard);
GPU_PROP(warpSize);
#if 0
GPU_PROP(unifiedAddressing);
GPU_PROP(l2CacheSize);
GPU_PROP(singleToDoublePrecisionPerfRatio);
#endif
// GPU_PROP(unifiedAddressing);
// GPU_PROP(l2CacheSize);
// GPU_PROP(singleToDoublePrecisionPerfRatio);
}
}
if ( world_rank == 0 ) {
@ -638,6 +628,7 @@ void Grid_debug_handler_init(void)
sigaction(SIGSEGV,&sa,NULL);
sigaction(SIGTRAP,&sa,NULL);
sigaction(SIGBUS,&sa,NULL);
sigaction(SIGUSR2,&sa,NULL);
feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);

View File

@ -31,7 +31,6 @@ directory
int main(int argc, char **argv) {
using namespace Grid;
using namespace Grid::QCD;
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
@ -44,18 +43,18 @@ int main(int argc, char **argv) {
typedef typename FermionAction::FermionField FermionField;
typedef Grid::XmlReader Serialiser;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// MD.name = std::string("Leap Frog");
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
// MD.name = std::string("Force Gradient");
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
MD.name = std::string("MinimumNorm2");
MD.MDsteps = 20;
MD.trajL = 1.0;
HMCparameters HMCparams;
HMCparams.StartTrajectory = 0;
HMCparams.Trajectories = 200;
@ -67,7 +66,7 @@ int main(int argc, char **argv) {
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_EODWF_lat";
CPparams.rng_prefix = "ckpoint_EODWF_rng";
@ -81,7 +80,7 @@ int main(int argc, char **argv) {
TheHMC.Resources.SetRNGSeeds(RNGpar);
// Construct observables
// here there is too much indirection
// here there is too much indirection
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
@ -118,7 +117,7 @@ int main(int argc, char **argv) {
// These lines are unecessary if BC are all periodic
std::vector<Complex> boundary = {1,1,1,-1};
FermionAction::ImplParams Params(boundary);
double StoppingCondition = 1e-10;
double MaxCGIterations = 30000;
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);

View File

@ -34,8 +34,7 @@ directory
#define MIXED_PRECISION
#endif
namespace Grid{
namespace QCD{
NAMESPACE_BEGIN(Grid);
/*
* Need a plan for gauge field update for mixed precision in HMC (2x speed up)
@ -163,11 +162,11 @@ namespace Grid{
MPCG(src,psi);
}
};
}};
NAMESPACE_END(Grid);
int main(int argc, char **argv) {
using namespace Grid;
using namespace Grid::QCD;
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();

View File

@ -34,8 +34,7 @@ directory
#define MIXED_PRECISION
#endif
namespace Grid{
namespace QCD{
NAMESPACE_BEGIN(Grid);
/*
* Need a plan for gauge field update for mixed precision in HMC (2x speed up)
@ -146,11 +145,12 @@ namespace Grid{
MPCG(src,psi);
}
};
}};
NAMESPACE_END(Grid);
int main(int argc, char **argv) {
using namespace Grid;
using namespace Grid::QCD;
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
@ -341,7 +341,7 @@ int main(int argc, char **argv) {
ActionCG,
ActionCG, ActionCG,
ActionCG, ActionCG,
// DerivativeCG, DerivativeCG,
// DerivativeCG, DerivativeCG,
OFRp, true);
#endif
Level1.push_back(&EOFA);

View File

@ -31,7 +31,6 @@ directory
int main(int argc, char **argv) {
using namespace Grid;
using namespace Grid::QCD;
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
@ -44,18 +43,18 @@ int main(int argc, char **argv) {
typedef typename FermionAction::FermionField FermionField;
typedef Grid::XmlReader Serialiser;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// MD.name = std::string("Leap Frog");
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
// MD.name = std::string("Force Gradient");
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
MD.name = std::string("MinimumNorm2");
MD.MDsteps = 20;
MD.trajL = 1.0;
HMCparameters HMCparams;
HMCparams.StartTrajectory = 30;
HMCparams.Trajectories = 200;
@ -68,7 +67,7 @@ int main(int argc, char **argv) {
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_EODWF_lat";
CPparams.rng_prefix = "ckpoint_EODWF_rng";
@ -82,7 +81,7 @@ int main(int argc, char **argv) {
TheHMC.Resources.SetRNGSeeds(RNGpar);
// Construct observables
// here there is too much indirection
// here there is too much indirection
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
@ -93,11 +92,11 @@ int main(int argc, char **argv) {
Real strange_mass = 0.04;
Real pv_mass = 1.0;
RealD M5 = 1.8;
RealD b = 1.0;
RealD b = 1.0;
RealD c = 0.0;
// FIXME:
// Same in MC and MD
// Same in MC and MD
// Need to mix precision too
OneFlavourRationalParams OFRp;
OFRp.lo = 4.0e-3;
@ -122,7 +121,7 @@ int main(int argc, char **argv) {
// These lines are unecessary if BC are all periodic
std::vector<Complex> boundary = {1,1,1,-1};
FermionAction::ImplParams Params(boundary);
double StoppingCondition = 1e-10;
double MaxCGIterations = 30000;
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);

View File

@ -108,7 +108,7 @@ public:
void saveBlock(const A2AMatrixSet<T> &m, const unsigned int ext, const unsigned int str,
const unsigned int i, const unsigned int j);
template <template <class> class Vec, typename VecT>
void load(Vec<VecT> &v, double *tRead = nullptr);
void load(Vec<VecT> &v, double *tRead = nullptr, GridBase *grid = nullptr);
private:
std::string filename_{""}, dataname_{""};
unsigned int nt_{0}, ni_{0}, nj_{0};
@ -506,44 +506,53 @@ void A2AMatrixIo<T>::saveBlock(const A2AMatrixSet<T> &m,
template <typename T>
template <template <class> class Vec, typename VecT>
void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead)
void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead, GridBase *grid)
{
#ifdef HAVE_HDF5
Hdf5Reader reader(filename_);
std::vector<hsize_t> hdim;
H5NS::DataSet dataset;
H5NS::DataSpace dataspace;
H5NS::CompType datatype;
push(reader, dataname_);
auto &group = reader.getGroup();
dataset = group.openDataSet(HADRONS_A2AM_NAME);
datatype = dataset.getCompType();
dataspace = dataset.getSpace();
hdim.resize(dataspace.getSimpleExtentNdims());
dataspace.getSimpleExtentDims(hdim.data());
if ((nt_*ni_*nj_ != 0) and
((hdim[0] != nt_) or (hdim[1] != ni_) or (hdim[2] != nj_)))
if (!(grid) || grid->IsBoss())
{
HADRONS_ERROR(Size, "all-to-all matrix size mismatch (got "
+ std::to_string(hdim[0]) + "x" + std::to_string(hdim[1]) + "x"
+ std::to_string(hdim[2]) + ", expected "
+ std::to_string(nt_) + "x" + std::to_string(ni_) + "x"
+ std::to_string(nj_));
}
else if (ni_*nj_ == 0)
{
if (hdim[0] != nt_)
Hdf5Reader reader(filename_);
push(reader, dataname_);
auto &group = reader.getGroup();
dataset = group.openDataSet(HADRONS_A2AM_NAME);
datatype = dataset.getCompType();
dataspace = dataset.getSpace();
hdim.resize(dataspace.getSimpleExtentNdims());
dataspace.getSimpleExtentDims(hdim.data());
if ((nt_ * ni_ * nj_ != 0) and
((hdim[0] != nt_) or (hdim[1] != ni_) or (hdim[2] != nj_)))
{
HADRONS_ERROR(Size, "all-to-all time size mismatch (got "
+ std::to_string(hdim[0]) + ", expected "
+ std::to_string(nt_) + ")");
HADRONS_ERROR(Size, "all-to-all matrix size mismatch (got "
+ std::to_string(hdim[0]) + "x" + std::to_string(hdim[1]) + "x"
+ std::to_string(hdim[2]) + ", expected "
+ std::to_string(nt_) + "x" + std::to_string(ni_) + "x"
+ std::to_string(nj_));
}
ni_ = hdim[1];
nj_ = hdim[2];
else if (ni_*nj_ == 0)
{
if (hdim[0] != nt_)
{
HADRONS_ERROR(Size, "all-to-all time size mismatch (got "
+ std::to_string(hdim[0]) + ", expected "
+ std::to_string(nt_) + ")");
}
ni_ = hdim[1];
nj_ = hdim[2];
}
}
if (grid)
{
grid->Broadcast(grid->BossRank(), &ni_, sizeof(unsigned int));
grid->Broadcast(grid->BossRank(), &nj_, sizeof(unsigned int));
}
A2AMatrix<T> buf(ni_, nj_);
int broadcastSize = sizeof(T) * buf.size();
std::vector<hsize_t> count = {1, static_cast<hsize_t>(ni_),
static_cast<hsize_t>(nj_)},
stride = {1, 1, 1},
@ -565,10 +574,20 @@ void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead)
std::cout << " " << t;
std::cout.flush();
}
dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(),
stride.data(), block.data());
if (tRead) *tRead -= usecond();
dataset.read(buf.data(), datatype, memspace, dataspace);
if (!(grid) || grid->IsBoss())
{
dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(),
stride.data(), block.data());
}
if (tRead) *tRead -= usecond();
if (!(grid) || grid->IsBoss())
{
dataset.read(buf.data(), datatype, memspace, dataspace);
}
if (grid)
{
grid->Broadcast(grid->BossRank(), buf.data(), broadcastSize);
}
if (tRead) *tRead += usecond();
v[t] = buf.template cast<VecT>();
}

View File

@ -52,6 +52,7 @@ public:
const std::vector<FermionField> & getNoise(void) const;
const FermionField & operator[](const unsigned int i) const;
FermionField & operator[](const unsigned int i);
void normalise(Real norm);
void resize(const unsigned int nNoise);
unsigned int size(void) const;
GridCartesian *getGrid(void) const;
@ -93,6 +94,21 @@ private:
unsigned int nSrc_;
};
template <typename FImpl>
class SparseSpinColorDiagonalNoise: public DilutedNoise<FImpl>
{
public:
typedef typename FImpl::FermionField FermionField;
public:
// constructor/destructor
SparseSpinColorDiagonalNoise(GridCartesian *g, unsigned int n_src, unsigned int n_sparse);
virtual ~SparseSpinColorDiagonalNoise(void) = default;
// generate noise
virtual void generateNoise(GridParallelRNG &rng);
private:
unsigned int nSrc_;
unsigned int nSparse_;
};
/******************************************************************************
* DilutedNoise template implementation *
@ -138,6 +154,15 @@ DilutedNoise<FImpl>::operator[](const unsigned int i)
return noise_[i];
}
template <typename FImpl>
void DilutedNoise<FImpl>::normalise(Real norm)
{
for(int i=0;i<noise_.size();i++)
{
noise_[i] = norm*noise_[i];
}
}
template <typename FImpl>
void DilutedNoise<FImpl>::resize(const unsigned int nNoise)
{
@ -245,6 +270,87 @@ void FullVolumeSpinColorDiagonalNoise<FImpl>::generateNoise(GridParallelRNG &rng
}
}
/******************************************************************************
* SparseSpinColorDiagonalNoise template implementation *
******************************************************************************/
template <typename FImpl>
SparseSpinColorDiagonalNoise<FImpl>::
SparseSpinColorDiagonalNoise(GridCartesian *g, unsigned int nSrc, unsigned int nSparse)
: DilutedNoise<FImpl>(g, nSrc*Ns*FImpl::Dimension), nSrc_(nSrc), nSparse_(nSparse)
{}
template <typename FImpl>
void SparseSpinColorDiagonalNoise<FImpl>::generateNoise(GridParallelRNG &rng)
{
typedef decltype(peekColour((*this)[0], 0)) SpinField;
auto &noise = *this;
auto g = this->getGrid();
auto nd = g->GlobalDimensions().size();
auto nc = FImpl::Dimension;
LatticeInteger coor(g), coorTot(g); coorTot = 0.;
Complex shift(1., 1.);
LatticeComplex eta(g), etaSparse(g);
SpinField etas(g);
unsigned int i = 0;
unsigned int j = 0;
unsigned int nSrc_ec;
if(nSrc_%nSparse_==0)
{
nSrc_ec = nSrc_/nSparse_;
}
else
{
nSrc_ec = (nSrc_ - nSrc_%nSparse_)/nSparse_;
}
for (unsigned int n = 0; n < nSrc_; ++n)
{
bernoulli(rng, eta);
eta = (2.*eta - shift)*(1./::sqrt(2.));
if(nSparse_ != 1)
{
assert(g->GlobalDimensions()[1]%nSparse_ == 0);
// # 0 # 0
// 0 # 0 #
// # 0 # 0
// 0 # 0 #
coorTot = 0;
for(unsigned int d = 0; d < nd; ++d)
{
LatticeCoordinate(coor, d);
coorTot = coorTot + coor;
}
coorTot = coorTot + j;
eta = where(mod(coorTot,nSparse_), 0.*eta, eta);
}
for (unsigned int s = 0; s < Ns; ++s)
{
etas = Zero();
pokeSpin(etas, eta, s);
for (unsigned int c = 0; c < nc; ++c)
{
noise[i] = Zero();
pokeColour(noise[i], etas, c);
i++;
/**/
}
}
((n+1)%nSrc_ec == 0) ? j++: 0;
}
Real norm = sqrt(1./nSrc_ec);
this->normalise(norm);
}
END_HADRONS_NAMESPACE
#endif // Hadrons_DilutedNoise_hpp_

View File

@ -87,13 +87,20 @@ public:
};
public:
DiskVectorBase(const std::string dirname, const unsigned int size = 0,
const unsigned int cacheSize = 1, const bool clean = true);
const unsigned int cacheSize = 1, const bool clean = true,
GridBase *grid = nullptr);
DiskVectorBase(DiskVectorBase<T> &&v) = default;
virtual ~DiskVectorBase(void);
const T & operator[](const unsigned int i) const;
RwAccessHelper operator[](const unsigned int i);
double hitRatio(void) const;
void resetStat(void);
void setSize(unsigned int size_);
unsigned int getSize() const;
unsigned int dvSize;
void setGrid(GridBase *grid_);
GridBase *getGrid() const;
GridBase *dvGrid;
private:
virtual void load(T &obj, const std::string filename) const = 0;
virtual void save(const std::string filename, const T &obj) const = 0;
@ -107,6 +114,7 @@ private:
unsigned int size_, cacheSize_;
double access_{0.}, hit_{0.};
bool clean_;
GridBase *grid_;
// using pointers to allow modifications when class is const
// semantic: const means data unmodified, but cache modification allowed
std::unique_ptr<std::vector<T>> cachePtr_;
@ -158,66 +166,92 @@ public:
{
return (*this)[i](j, k);
}
std::vector<int> dimensions() const
{
std::vector<int> dims(3);
dims[0] = (*this).getSize();
dims[1] = (*this)[0].rows();
dims[2] = (*this)[0].cols();
return dims;
}
private:
virtual void load(EigenDiskVectorMat<T> &obj, const std::string filename) const
{
std::ifstream f(filename, std::ios::binary);
uint32_t crc, check;
Eigen::Index nRow, nCol;
size_t matSize;
double tRead, tHash;
f.read(reinterpret_cast<char *>(&crc), sizeof(crc));
f.read(reinterpret_cast<char *>(&nRow), sizeof(nRow));
f.read(reinterpret_cast<char *>(&nCol), sizeof(nCol));
obj.resize(nRow, nCol);
matSize = nRow*nCol*sizeof(T);
tRead = -usecond();
f.read(reinterpret_cast<char *>(obj.data()), matSize);
tRead += usecond();
tHash = -usecond();
#ifdef USE_IPP
check = GridChecksum::crc32c(obj.data(), matSize);
#else
check = GridChecksum::crc32(obj.data(), matSize);
#endif
tHash += usecond();
DV_DEBUG_MSG(this, "Eigen read " << tRead/1.0e6 << " sec " << matSize/tRead*1.0e6/1024/1024 << " MB/s");
DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << check << std::dec
<< " " << tHash/1.0e6 << " sec " << matSize/tHash*1.0e6/1024/1024 << " MB/s");
if (crc != check)
GridBase *loadGrid;
loadGrid = (*this).getGrid();
if (!(loadGrid) || loadGrid->IsBoss())
{
HADRONS_ERROR(Io, "checksum failed")
std::ifstream f(filename, std::ios::binary);
uint32_t crc, check;
Eigen::Index nRow, nCol;
size_t matSize;
double tRead, tHash;
f.read(reinterpret_cast<char *>(&crc), sizeof(crc));
f.read(reinterpret_cast<char *>(&nRow), sizeof(nRow));
f.read(reinterpret_cast<char *>(&nCol), sizeof(nCol));
obj.resize(nRow, nCol);
matSize = nRow*nCol*sizeof(T);
tRead = -usecond();
f.read(reinterpret_cast<char *>(obj.data()), matSize);
tRead += usecond();
tHash = -usecond();
#ifdef USE_IPP
check = GridChecksum::crc32c(obj.data(), matSize);
#else
check = GridChecksum::crc32(obj.data(), matSize);
#endif
tHash += usecond();
DV_DEBUG_MSG(this, "Eigen read " << tRead/1.0e6 << " sec " << matSize/tRead*1.0e6/1024/1024 << " MB/s");
DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << check << std::dec
<< " " << tHash/1.0e6 << " sec " << matSize/tHash*1.0e6/1024/1024 << " MB/s");
if (crc != check)
{
HADRONS_ERROR(Io, "checksum failed")
}
}
int broadcastSize;
broadcastSize = sizeof(T)*obj.size();
if (loadGrid)
{
loadGrid->Broadcast(loadGrid->BossRank(), obj.data(), broadcastSize);
loadGrid->Barrier();
}
}
virtual void save(const std::string filename, const EigenDiskVectorMat<T> &obj) const
{
std::ofstream f(filename, std::ios::binary);
uint32_t crc;
Eigen::Index nRow, nCol;
size_t matSize;
double tWrite, tHash;
nRow = obj.rows();
nCol = obj.cols();
matSize = nRow*nCol*sizeof(T);
tHash = -usecond();
#ifdef USE_IPP
crc = GridChecksum::crc32c(obj.data(), matSize);
#else
crc = GridChecksum::crc32(obj.data(), matSize);
#endif
tHash += usecond();
f.write(reinterpret_cast<char *>(&crc), sizeof(crc));
f.write(reinterpret_cast<char *>(&nRow), sizeof(nRow));
f.write(reinterpret_cast<char *>(&nCol), sizeof(nCol));
tWrite = -usecond();
f.write(reinterpret_cast<const char *>(obj.data()), matSize);
tWrite += usecond();
DV_DEBUG_MSG(this, "Eigen write " << tWrite/1.0e6 << " sec " << matSize/tWrite*1.0e6/1024/1024 << " MB/s");
DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << crc << std::dec
<< " " << tHash/1.0e6 << " sec " << matSize/tHash*1.0e6/1024/1024 << " MB/s");
GridBase *saveGrid;
saveGrid = (*this).getGrid();
if (!(saveGrid) || saveGrid->IsBoss())
{
std::ofstream f(filename, std::ios::binary);
uint32_t crc;
Eigen::Index nRow, nCol;
size_t matSize;
double tWrite, tHash;
nRow = obj.rows();
nCol = obj.cols();
matSize = nRow*nCol*sizeof(T);
tHash = -usecond();
#ifdef USE_IPP
crc = GridChecksum::crc32c(obj.data(), matSize);
#else
crc = GridChecksum::crc32(obj.data(), matSize);
#endif
tHash += usecond();
f.write(reinterpret_cast<char *>(&crc), sizeof(crc));
f.write(reinterpret_cast<char *>(&nRow), sizeof(nRow));
f.write(reinterpret_cast<char *>(&nCol), sizeof(nCol));
tWrite = -usecond();
f.write(reinterpret_cast<const char *>(obj.data()), matSize);
tWrite += usecond();
DV_DEBUG_MSG(this, "Eigen write " << tWrite/1.0e6 << " sec " << matSize/tWrite*1.0e6/1024/1024 << " MB/s");
DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << crc << std::dec
<< " " << tHash/1.0e6 << " sec " << matSize/tHash*1.0e6/1024/1024 << " MB/s");
}
if (saveGrid) saveGrid->Barrier();
}
};
@ -228,8 +262,9 @@ template <typename T>
DiskVectorBase<T>::DiskVectorBase(const std::string dirname,
const unsigned int size,
const unsigned int cacheSize,
const bool clean)
: dirname_(dirname), size_(size), cacheSize_(cacheSize), clean_(clean)
const bool clean,
GridBase *grid)
: dirname_(dirname), size_(size), cacheSize_(cacheSize), clean_(clean), grid_(grid)
, cachePtr_(new std::vector<T>(size))
, modifiedPtr_(new std::vector<bool>(size, false))
, indexPtr_(new std::map<unsigned int, unsigned int>())
@ -238,15 +273,21 @@ DiskVectorBase<T>::DiskVectorBase(const std::string dirname,
{
struct stat s;
if(stat(dirname.c_str(), &s) == 0)
if (!(grid_) || grid_->IsBoss())
{
HADRONS_ERROR(Io, "directory '" + dirname + "' already exists")
if(stat(dirname.c_str(), &s) == 0)
{
HADRONS_ERROR(Io, "directory '" + dirname + "' already exists")
}
mkdir(dirname);
}
mkdir(dirname);
if (grid_) grid_->Barrier();
for (unsigned int i = 0; i < cacheSize_; ++i)
{
freePtr_->push(i);
}
setSize(size_);
setGrid(grid_);
}
template <typename T>
@ -258,6 +299,30 @@ DiskVectorBase<T>::~DiskVectorBase(void)
}
}
template <typename T>
void DiskVectorBase<T>::setSize(unsigned int size_)
{
dvSize = size_;
}
template <typename T>
unsigned int DiskVectorBase<T>::getSize() const
{
return dvSize;
}
template <typename T>
void DiskVectorBase<T>::setGrid(GridBase *grid_)
{
dvGrid = grid_;
}
template <typename T>
GridBase *DiskVectorBase<T>::getGrid() const
{
return dvGrid;
}
template <typename T>
const T & DiskVectorBase<T>::operator[](const unsigned int i) const
{
@ -299,7 +364,7 @@ const T & DiskVectorBase<T>::operator[](const unsigned int i) const
}
DV_DEBUG_MSG(this, "in cache: " << msg);
#endif
if (grid_) grid_->Barrier();
return cache[index.at(i)];
}
@ -358,6 +423,7 @@ void DiskVectorBase<T>::evict(void) const
index.erase(i);
loads.pop_front();
}
if (grid_) grid_->Barrier();
}
template <typename T>
@ -395,27 +461,14 @@ void DiskVectorBase<T>::cacheInsert(const unsigned int i, const T &obj) const
auto &freeInd = *freePtr_;
auto &loads = *loadsPtr_;
// cache miss, evict and store
if (index.find(i) == index.end())
{
evict();
index[i] = freeInd.top();
freeInd.pop();
cache[index.at(i)] = obj;
loads.push_back(i);
modified[index.at(i)] = false;
}
// cache hit, modify current value
else
{
auto pos = std::find(loads.begin(), loads.end(), i);
cache[index.at(i)] = obj;
modified[index.at(i)] = true;
loads.erase(pos);
loads.push_back(i);
}
evict();
index[i] = freeInd.top();
freeInd.pop();
cache[index.at(i)] = obj;
loads.push_back(i);
modified[index.at(i)] = false;
if (grid_) grid_->Barrier();
#ifdef DV_DEBUG
std::string msg;
@ -434,21 +487,23 @@ void DiskVectorBase<T>::cacheInsert(const unsigned int i, const T &obj) const
template <typename T>
void DiskVectorBase<T>::clean(void)
{
auto unlink = [](const char *fpath, const struct stat *sb,
int typeflag, struct FTW *ftwbuf)
if (!(grid_) || grid_->IsBoss())
{
int rv = remove(fpath);
auto unlink = [](const char *fpath, const struct stat *sb,
int typeflag, struct FTW *ftwbuf) {
int rv = remove(fpath);
if (rv)
{
HADRONS_ERROR(Io, "cannot remove '" + std::string(fpath) + "': "
+ std::string(std::strerror(errno)));
}
if (rv)
{
HADRONS_ERROR(Io, "cannot remove '" + std::string(fpath) + "': " + std::string(std::strerror(errno)));
}
return rv;
};
return rv;
};
nftw(dirname_.c_str(), unlink, 64, FTW_DEPTH | FTW_PHYS);
nftw(dirname_.c_str(), unlink, 64, FTW_DEPTH | FTW_PHYS);
}
if (grid_) grid_->Barrier();
}
END_HADRONS_NAMESPACE

View File

@ -84,6 +84,16 @@ GridParallelRNG * Environment::get4dRng(void)
return rng4d_.get();
}
GridSerialRNG * Environment::getSerialRng(void)
{
if (rngSerial_ == nullptr)
{
rngSerial_.reset(new GridSerialRNG());
}
return rngSerial_.get();
}
// general memory management ///////////////////////////////////////////////////
void Environment::addObject(const std::string name, const int moduleAddress)
{

View File

@ -74,6 +74,7 @@ public:
typedef std::unique_ptr<GridCartesian> GridPt;
typedef std::unique_ptr<GridRedBlackCartesian> GridRbPt;
typedef std::unique_ptr<GridParallelRNG> RngPt;
typedef std::unique_ptr<GridSerialRNG> SerialRngPt;
enum class Storage {object, cache, temporary};
private:
struct ObjInfo
@ -114,6 +115,7 @@ public:
double getVolume(void) const;
// random number generator
GridParallelRNG * get4dRng(void);
GridSerialRNG * getSerialRng(void);
// general memory management
void addObject(const std::string name,
const int moduleAddress = -1);
@ -183,6 +185,7 @@ private:
unsigned int nd_;
// random number generator
RngPt rng4d_{nullptr};
SerialRngPt rngSerial_{nullptr};
// object store
std::vector<ObjInfo> object_;
std::map<std::string, unsigned int> objectAddress_;

View File

@ -272,7 +272,7 @@ struct Correlator: Serializable
{
GRID_SERIALIZABLE_CLASS_MEMBERS(ARG(Correlator<Metadata, Scalar>),
Metadata, info,
std::vector<Complex>, corr);
std::vector<Scalar>, corr);
};
END_HADRONS_NAMESPACE

View File

@ -93,3 +93,18 @@ GridParallelRNG & ModuleBase::rng4d(void)
return r;
}
GridSerialRNG & ModuleBase::rngSerial(void)
{
auto &r = *env().getSerialRng();
if (makeSeedString() != seed_)
{
seed_ = makeSeedString();
LOG(Message) << "Seeding Serial RNG " << &r << " with string '"
<< seed_ << "'" << std::endl;
r.SeedUniqueString(seed_);
}
return r;
}

View File

@ -1,7 +1,6 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Module.hpp
Copyright (C) 2015-2019
@ -196,6 +195,7 @@ protected:
DEFINE_VM_ALIAS;
// RNG seeded from module string
GridParallelRNG &rng4d(void);
GridSerialRNG &rngSerial(void);
private:
std::string makeSeedString(void);
private:

View File

@ -1,71 +1,87 @@
#include <Hadrons/Modules/MSource/Gauss.hpp>
#include <Hadrons/Modules/MSource/Momentum.hpp>
#include <Hadrons/Modules/MSource/SeqAslash.hpp>
#include <Hadrons/Modules/MSource/Z2.hpp>
#include <Hadrons/Modules/MSource/Point.hpp>
#include <Hadrons/Modules/MSource/SeqGamma.hpp>
#include <Hadrons/Modules/MSource/Convolution.hpp>
#include <Hadrons/Modules/MSource/Wall.hpp>
#include <Hadrons/Modules/MSource/SeqConserved.hpp>
#include <Hadrons/Modules/MScalarSUN/Div.hpp>
#include <Hadrons/Modules/MScalarSUN/TrKinetic.hpp>
#include <Hadrons/Modules/MScalarSUN/TrPhi.hpp>
#include <Hadrons/Modules/MScalarSUN/TwoPoint.hpp>
#include <Hadrons/Modules/MScalarSUN/Grad.hpp>
#include <Hadrons/Modules/MScalarSUN/Utils.hpp>
#include <Hadrons/Modules/MScalarSUN/StochFreeField.hpp>
#include <Hadrons/Modules/MScalarSUN/EMT.hpp>
#include <Hadrons/Modules/MScalarSUN/TrMag.hpp>
#include <Hadrons/Modules/MScalarSUN/TwoPointNPR.hpp>
#include <Hadrons/Modules/MScalarSUN/TransProj.hpp>
#include <Hadrons/Modules/MNoise/TimeDilutedSpinColorDiagonal.hpp>
#include <Hadrons/Modules/MNoise/FullVolumeSpinColorDiagonal.hpp>
#include <Hadrons/Modules/MScalar/FreeProp.hpp>
#include <Hadrons/Modules/MScalar/Scalar.hpp>
#include <Hadrons/Modules/MScalar/ChargedProp.hpp>
#include <Hadrons/Modules/MAction/Wilson.hpp>
#include <Hadrons/Modules/MAction/ScaledDWF.hpp>
#include <Hadrons/Modules/MAction/MobiusDWF.hpp>
#include <Hadrons/Modules/MAction/WilsonClover.hpp>
#include <Hadrons/Modules/MAction/ZMobiusDWF.hpp>
#include <Hadrons/Modules/MAction/DWF.hpp>
#include <Hadrons/Modules/MGauge/UnitEm.hpp>
#include <Hadrons/Modules/MGauge/Electrify.hpp>
#include <Hadrons/Modules/MGauge/StoutSmearing.hpp>
#include <Hadrons/Modules/MGauge/Random.hpp>
#include <Hadrons/Modules/MGauge/FundtoHirep.hpp>
#include <Hadrons/Modules/MGauge/GaugeFix.hpp>
#include <Hadrons/Modules/MGauge/Unit.hpp>
#include <Hadrons/Modules/MGauge/StochEm.hpp>
#include <Hadrons/Modules/MUtilities/RandomVectors.hpp>
#include <Hadrons/Modules/MUtilities/PrecisionCast.hpp>
#include <Hadrons/Modules/MIO/LoadCosmHol.hpp>
#include <Hadrons/Modules/MIO/LoadA2AVectors.hpp>
#include <Hadrons/Modules/MIO/LoadEigenPack.hpp>
#include <Hadrons/Modules/MIO/LoadNersc.hpp>
#include <Hadrons/Modules/MIO/LoadBinary.hpp>
#include <Hadrons/Modules/MIO/LoadCoarseEigenPack.hpp>
#include <Hadrons/Modules/MAction/MobiusDWF.hpp>
#include <Hadrons/Modules/MAction/ScaledDWF.hpp>
#include <Hadrons/Modules/MAction/WilsonClover.hpp>
#include <Hadrons/Modules/MAction/Wilson.hpp>
#include <Hadrons/Modules/MAction/ZMobiusDWF.hpp>
#include <Hadrons/Modules/MContraction/A2AAslashField.hpp>
#include <Hadrons/Modules/MContraction/A2AFourQuarkContraction.hpp>
#include <Hadrons/Modules/MContraction/A2ALoop.hpp>
#include <Hadrons/Modules/MContraction/A2AMesonField.hpp>
#include <Hadrons/Modules/MContraction/Baryon.hpp>
#include <Hadrons/Modules/MContraction/DiscLoop.hpp>
#include <Hadrons/Modules/MContraction/Gamma3pt.hpp>
#include <Hadrons/Modules/MContraction/Meson.hpp>
#include <Hadrons/Modules/MContraction/SigmaToNucleonEye.hpp>
#include <Hadrons/Modules/MContraction/SigmaToNucleonNonEye.hpp>
#include <Hadrons/Modules/MContraction/WeakEye3pt.hpp>
#include <Hadrons/Modules/MContraction/WeakMesonDecayKl2.hpp>
#include <Hadrons/Modules/MContraction/Gamma3pt.hpp>
#include <Hadrons/Modules/MContraction/A2AMesonField.hpp>
#include <Hadrons/Modules/MContraction/A2ALoop.hpp>
#include <Hadrons/Modules/MContraction/WeakNonEye3pt.hpp>
#include <Hadrons/Modules/MContraction/DiscLoop.hpp>
#include <Hadrons/Modules/MContraction/A2AAslashField.hpp>
#include <Hadrons/Modules/MContraction/Baryon.hpp>
#include <Hadrons/Modules/MContraction/Meson.hpp>
#include <Hadrons/Modules/MNPR/FourQuark.hpp>
#include <Hadrons/Modules/MNPR/Bilinear.hpp>
#include <Hadrons/Modules/MNPR/Amputate.hpp>
#include <Hadrons/Modules/MSolver/A2AAslashVectors.hpp>
#include <Hadrons/Modules/MSolver/RBPrecCG.hpp>
#include <Hadrons/Modules/MSolver/Guesser.hpp>
#include <Hadrons/Modules/MSolver/LocalCoherenceLanczos.hpp>
#include <Hadrons/Modules/MSolver/A2AVectors.hpp>
#include <Hadrons/Modules/MSolver/MixedPrecisionRBPrecCG.hpp>
#include <Hadrons/Modules/MDistil/Distil.hpp>
#include <Hadrons/Modules/MDistil/DistilPar.hpp>
#include <Hadrons/Modules/MDistil/DistilVectors.hpp>
#include <Hadrons/Modules/MDistil/LapEvec.hpp>
#include <Hadrons/Modules/MDistil/Noises.hpp>
#include <Hadrons/Modules/MDistil/PerambFromSolve.hpp>
#include <Hadrons/Modules/MDistil/Perambulator.hpp>
#include <Hadrons/Modules/MFermion/EMLepton.hpp>
#include <Hadrons/Modules/MFermion/FreeProp.hpp>
#include <Hadrons/Modules/MFermion/GaugeProp.hpp>
#include <Hadrons/Modules/MFermion/EMLepton.hpp>
#include <Hadrons/Modules/MSink/Smear.hpp>
#include <Hadrons/Modules/MGauge/Electrify.hpp>
#include <Hadrons/Modules/MGauge/FundtoHirep.hpp>
#include <Hadrons/Modules/MGauge/GaugeFix.hpp>
#include <Hadrons/Modules/MGauge/Random.hpp>
#include <Hadrons/Modules/MGauge/StochEm.hpp>
#include <Hadrons/Modules/MGauge/StoutSmearing.hpp>
#include <Hadrons/Modules/MGauge/UnitEm.hpp>
#include <Hadrons/Modules/MGauge/Unit.hpp>
#include <Hadrons/Modules/MIO/LoadA2AMatrixDiskVector.hpp>
#include <Hadrons/Modules/MIO/LoadA2AVectors.hpp>
#include <Hadrons/Modules/MIO/LoadBinary.hpp>
#include <Hadrons/Modules/MIO/LoadCoarseEigenPack.hpp>
#include <Hadrons/Modules/MIO/LoadCosmHol.hpp>
#include <Hadrons/Modules/MIO/LoadDistilNoise.hpp>
#include <Hadrons/Modules/MIO/LoadEigenPack.hpp>
#include <Hadrons/Modules/MIO/LoadNersc.hpp>
#include <Hadrons/Modules/MIO/LoadPerambulator.hpp>
#include <Hadrons/Modules/MNoise/FullVolumeSpinColorDiagonal.hpp>
#include <Hadrons/Modules/MNoise/SparseSpinColorDiagonal.hpp>
#include <Hadrons/Modules/MNoise/TimeDilutedSpinColorDiagonal.hpp>
#include <Hadrons/Modules/MNPR/Amputate.hpp>
#include <Hadrons/Modules/MNPR/Bilinear.hpp>
#include <Hadrons/Modules/MNPR/FourQuark.hpp>
#include <Hadrons/Modules/MScalar/ChargedProp.hpp>
#include <Hadrons/Modules/MScalar/FreeProp.hpp>
#include <Hadrons/Modules/MScalar/Scalar.hpp>
#include <Hadrons/Modules/MScalarSUN/Div.hpp>
#include <Hadrons/Modules/MScalarSUN/EMT.hpp>
#include <Hadrons/Modules/MScalarSUN/Grad.hpp>
#include <Hadrons/Modules/MScalarSUN/StochFreeField.hpp>
#include <Hadrons/Modules/MScalarSUN/TransProj.hpp>
#include <Hadrons/Modules/MScalarSUN/TrKinetic.hpp>
#include <Hadrons/Modules/MScalarSUN/TrMag.hpp>
#include <Hadrons/Modules/MScalarSUN/TrPhi.hpp>
#include <Hadrons/Modules/MScalarSUN/TwoPoint.hpp>
#include <Hadrons/Modules/MScalarSUN/TwoPointNPR.hpp>
#include <Hadrons/Modules/MScalarSUN/Utils.hpp>
#include <Hadrons/Modules/MSink/Point.hpp>
#include <Hadrons/Modules/MSink/Smear.hpp>
#include <Hadrons/Modules/MSolver/A2AAslashVectors.hpp>
#include <Hadrons/Modules/MSolver/A2AVectors.hpp>
#include <Hadrons/Modules/MSolver/Guesser.hpp>
#include <Hadrons/Modules/MSolver/LocalCoherenceLanczos.hpp>
#include <Hadrons/Modules/MSolver/MixedPrecisionRBPrecCG.hpp>
#include <Hadrons/Modules/MSolver/RBPrecCG.hpp>
#include <Hadrons/Modules/MSource/Convolution.hpp>
#include <Hadrons/Modules/MSource/Gauss.hpp>
#include <Hadrons/Modules/MSource/JacobiSmear.hpp>
#include <Hadrons/Modules/MSource/Momentum.hpp>
#include <Hadrons/Modules/MSource/MomentumPhase.hpp>
#include <Hadrons/Modules/MSource/Point.hpp>
#include <Hadrons/Modules/MSource/SeqAslash.hpp>
#include <Hadrons/Modules/MSource/SeqConserved.hpp>
#include <Hadrons/Modules/MSource/SeqGamma.hpp>
#include <Hadrons/Modules/MSource/Wall.hpp>
#include <Hadrons/Modules/MSource/Z2.hpp>
#include <Hadrons/Modules/MUtilities/PrecisionCast.hpp>
#include <Hadrons/Modules/MUtilities/RandomVectors.hpp>

View File

@ -174,6 +174,7 @@ void TA2AAslashField<FImpl, PhotonImpl>::setup(void)
template <typename FImpl, typename PhotonImpl>
void TA2AAslashField<FImpl, PhotonImpl>::execute(void)
{
#ifndef GRID_NVCC
auto &left = envGet(std::vector<FermionField>, par().left);
auto &right = envGet(std::vector<FermionField>, par().right);
@ -237,6 +238,7 @@ void TA2AAslashField<FImpl, PhotonImpl>::execute(void)
envGetTmp(Computation, computation);
computation.execute(left, right, kernel, ionameFn, filenameFn, metadataFn);
#endif
}
END_MODULE_NAMESPACE

View File

@ -0,0 +1,7 @@
#include <Hadrons/Modules/MContraction/A2AFourQuarkContraction.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MContraction;
template class Grid::Hadrons::MContraction::TA2AFourQuarkContraction<FIMPL>;

View File

@ -0,0 +1,138 @@
#ifndef Hadrons_MContraction_A2AFourQuarkContraction_hpp_
#define Hadrons_MContraction_A2AFourQuarkContraction_hpp_
#include <Hadrons/Global.hpp>
#include <Hadrons/Module.hpp>
#include <Hadrons/ModuleFactory.hpp>
#include <Hadrons/DiskVector.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* A2AFourQuarkContraction *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
class A2AFourQuarkContractionPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(A2AFourQuarkContractionPar,
std::string, v1,
std::string, v2,
std::string, mf12,
bool, allContr,
unsigned int, dt);
};
template <typename FImpl>
class TA2AFourQuarkContraction: public Module<A2AFourQuarkContractionPar>
{
public:
FERM_TYPE_ALIASES(FImpl, );
// constructor
TA2AFourQuarkContraction(const std::string name);
// destructor
virtual ~TA2AFourQuarkContraction(void) {};
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
private:
unsigned int nt_;
};
MODULE_REGISTER_TMP(A2AFourQuarkContraction, TA2AFourQuarkContraction<FIMPL>, MContraction);
/******************************************************************************
* TA2AFourQuarkContraction implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TA2AFourQuarkContraction<FImpl>::TA2AFourQuarkContraction(const std::string name)
: Module<A2AFourQuarkContractionPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TA2AFourQuarkContraction<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().v1, par().v2, par().mf12};
return in;
}
template <typename FImpl>
std::vector<std::string> TA2AFourQuarkContraction<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TA2AFourQuarkContraction<FImpl>::setup(void)
{
if (par().allContr)
{
nt_ = env().getDim(Tp);
envTmp(std::vector<PropagatorField>, "tmpWWVV", 1, nt_, envGetGrid(PropagatorField));
envCreate(std::vector<PropagatorField>, getName(), 1, nt_, envGetGrid(PropagatorField));
}
else
{
envTmp(std::vector<PropagatorField>, "tmpWWVV", 1, 1, envGetGrid(PropagatorField));
envCreate(PropagatorField, getName(), 1, envGetGrid(PropagatorField));
}
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TA2AFourQuarkContraction<FImpl>::execute(void)
{
auto &v1 = envGet(std::vector<FermionField>, par().v1);
auto &v2 = envGet(std::vector<FermionField>, par().v2);
auto &mf12 = envGet(EigenDiskVector<Complex>, par().mf12);
envGetTmp(std::vector<PropagatorField>, tmpWWVV);
unsigned int dt = par().dt;
unsigned int nt = env().getDim(Tp);
if (par().allContr)
{
LOG(Message) << "Computing 4 quark contraction for " << getName()
<< " for all t0 time translations "
<< "with nt = " << nt_ << " and dt = " << dt << std::endl;
auto &WWVV = envGet(std::vector<PropagatorField>, getName());
A2Autils<FImpl>::ContractWWVV(tmpWWVV, mf12, &v1[0], &v2[0]);
for(unsigned int t = 0; t < nt_; t++){
unsigned int t0 = (t + dt) % nt_;
WWVV[t] = tmpWWVV[t0];
}
}
else
{
LOG(Message) << "Computing 4 quark contraction for: " << getName()
<< " for time dt = " << dt << std::endl;
auto &WWVV = envGet(PropagatorField, getName());
int ni = v1.size();
int nj = v2.size();
Eigen::Matrix<Complex, -1, -1, Eigen::RowMajor> mf;
mf = mf12[dt];
Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>> mfT(mf.data(), 1, ni, nj);
A2Autils<FImpl>::ContractWWVV(tmpWWVV, mfT, &v1[0], &v2[0]);
WWVV = tmpWWVV[0];
}
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MContraction_A2AFourQuarkContraction_hpp_

View File

@ -7,7 +7,7 @@ Source file: Hadrons/Modules/MContraction/Baryon.hpp
Copyright (C) 2015-2019
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
Author: Felix Erben <felix.erben@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -33,6 +33,7 @@ See the full license in the file "LICENSE" in the top level distribution directo
#include <Hadrons/Global.hpp>
#include <Hadrons/Module.hpp>
#include <Hadrons/ModuleFactory.hpp>
#include <Grid/qcd/utils/BaryonUtils.h>
BEGIN_HADRONS_NAMESPACE
@ -41,6 +42,9 @@ BEGIN_HADRONS_NAMESPACE
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
typedef std::pair<Gamma::Algebra, Gamma::Algebra> GammaAB;
typedef std::pair<GammaAB, GammaAB> GammaABPair;
class BaryonPar: Serializable
{
public:
@ -48,6 +52,11 @@ public:
std::string, q1,
std::string, q2,
std::string, q3,
std::string, gammas,
std::string, quarks,
std::string, prefactors,
std::string, parity,
std::string, sink,
std::string, output);
};
@ -58,12 +67,21 @@ public:
FERM_TYPE_ALIASES(FImpl1, 1);
FERM_TYPE_ALIASES(FImpl2, 2);
FERM_TYPE_ALIASES(FImpl3, 3);
class Result: Serializable
BASIC_TYPE_ALIASES(ScalarImplCR, Scalar);
SINK_TYPE_ALIASES(Scalar);
class Metadata: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
std::vector<std::vector<std::vector<Complex>>>, corr);
GRID_SERIALIZABLE_CLASS_MEMBERS(Metadata,
Gamma::Algebra, gammaA_left,
Gamma::Algebra, gammaB_left,
Gamma::Algebra, gammaA_right,
Gamma::Algebra, gammaB_right,
std::string, quarks,
std::string, prefactors,
int, parity);
};
typedef Correlator<Metadata> Result;
public:
// constructor
TBaryon(const std::string name);
@ -72,11 +90,14 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
virtual void parseGammaString(std::vector<GammaABPair> &gammaList);
protected:
// setup
virtual void setup(void);
// execution
virtual void execute(void);
// Which gamma algebra was specified
Gamma::Algebra al;
};
MODULE_REGISTER_TMP(Baryon, ARG(TBaryon<FIMPL, FIMPL, FIMPL>), MContraction);
@ -94,7 +115,7 @@ TBaryon<FImpl1, FImpl2, FImpl3>::TBaryon(const std::string name)
template <typename FImpl1, typename FImpl2, typename FImpl3>
std::vector<std::string> TBaryon<FImpl1, FImpl2, FImpl3>::getInput(void)
{
std::vector<std::string> input = {par().q1, par().q2, par().q3};
std::vector<std::string> input = {par().q1, par().q2, par().q3, par().sink};
return input;
}
@ -107,30 +128,199 @@ std::vector<std::string> TBaryon<FImpl1, FImpl2, FImpl3>::getOutput(void)
return out;
}
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TBaryon<FImpl1, FImpl2,FImpl3>::parseGammaString(std::vector<GammaABPair> &gammaList)
{
gammaList.clear();
std::string gammaString = par().gammas;
//Shorthands for standard baryon operators
gammaString = regex_replace(gammaString, std::regex("j12"),"(Identity SigmaXZ)");
gammaString = regex_replace(gammaString, std::regex("j32X"),"(Identity MinusGammaZGamma5)");
gammaString = regex_replace(gammaString, std::regex("j32Y"),"(Identity GammaT)");
gammaString = regex_replace(gammaString, std::regex("j32Z"),"(Identity GammaXGamma5)");
//Shorthands for less common baryon operators
gammaString = regex_replace(gammaString, std::regex("j12_alt1"),"(Gamma5 MinusSigmaYT)");
gammaString = regex_replace(gammaString, std::regex("j12_alt2"),"(Identity GammaYGamma5)");
//A single gamma matrix
std::regex rex_g("([0-9a-zA-Z]+)");
//The full string we expect
std::regex rex("( *\\(( *\\(([0-9a-zA-Z]+) +([0-9a-zA-Z]+) *\\)){2} *\\) *)+");
std::smatch sm;
std::regex_match(gammaString, sm, rex);
assert(sm[0].matched && "invalid gamma structure.");
auto gamma_begin = std::sregex_iterator(gammaString.begin(), gammaString.end(), rex_g);
auto gamma_end = std::sregex_iterator();
int nGamma = std::distance(gamma_begin, gamma_end);
//couldn't find out how to count the size in the iterator, other than looping through it...
/* int nGamma=0;
for (std::sregex_iterator i = gamma_begin; i != gamma_end; ++i) {
nGamma++;
}
*/
gammaList.resize(nGamma/4);
std::vector<std::string> gS;
gS.resize(nGamma);
//even more ugly workarounds here...
int iG=0;
for (std::sregex_iterator i = gamma_begin; i != gamma_end; ++i) {
std::smatch match = *i;
gS[iG] = match.str();
iG++;
}
for (int i = 0; i < gammaList.size(); i++){
std::vector<Gamma::Algebra> gS1 = strToVec<Gamma::Algebra>(gS[4*i]);
std::vector<Gamma::Algebra> gS2 = strToVec<Gamma::Algebra>(gS[4*i+1]);
std::vector<Gamma::Algebra> gS3 = strToVec<Gamma::Algebra>(gS[4*i+2]);
std::vector<Gamma::Algebra> gS4 = strToVec<Gamma::Algebra>(gS[4*i+3]);
gammaList[i].first.first=gS1[0];
gammaList[i].first.second=gS2[0];
gammaList[i].second.first=gS3[0];
gammaList[i].second.second=gS4[0];
}
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TBaryon<FImpl1, FImpl2, FImpl3>::setup(void)
{
envTmpLat(LatticeComplex, "c");
envTmpLat(LatticeComplex, "c2");
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void)
{
LOG(Message) << "Computing baryon contractions '" << getName() << "' using"
<< " quarks '" << par().q1 << "', '" << par().q2 << "', and '"
<< par().q3 << "'" << std::endl;
auto &q1 = envGet(PropagatorField1, par().q1);
auto &q2 = envGet(PropagatorField2, par().q2);
auto &q3 = envGet(PropagatorField3, par().q2);
std::vector<std::string> quarks = strToVec<std::string>(par().quarks);
std::vector<double> prefactors = strToVec<double>(par().prefactors);
int nQ=quarks.size();
const int parity {par().parity.size()>0 ? std::stoi(par().parity) : 1};
std::vector<GammaABPair> gammaList;
parseGammaString(gammaList);
assert(prefactors.size()==nQ && "number of prefactors needs to match number of quark-structures.");
for (int iQ = 0; iQ < nQ; iQ++)
assert(quarks[iQ].size()==3 && "quark-structures must consist of 3 quarks each.");
LOG(Message) << "Computing baryon contractions '" << getName() << "'" << std::endl;
for (int iQ1 = 0; iQ1 < nQ; iQ1++)
for (int iQ2 = 0; iQ2 < nQ; iQ2++)
LOG(Message) << prefactors[iQ1]*prefactors[iQ2] << "*<" << quarks[iQ1] << "|" << quarks[iQ2] << ">" << std::endl;
LOG(Message) << " using quarks " << par().q1 << "', " << par().q2 << "', and '" << par().q3 << std::endl;
for (int iG = 0; iG < gammaList.size(); iG++)
LOG(Message) << "' with (Gamma^A,Gamma^B)_left = ( " << gammaList[iG].first.first << " , " << gammaList[iG].first.second << "') and (Gamma^A,Gamma^B)_right = ( " << gammaList[iG].second.first << " , " << gammaList[iG].second.second << ")" << std::endl;
LOG(Message) << "and parity " << parity << " using sink " << par().sink << "." << std::endl;
envGetTmp(LatticeComplex, c);
Result result;
// FIXME: do contractions
// saveResult(par().output, "meson", result);
envGetTmp(LatticeComplex, c2);
int nt = env().getDim(Tp);
std::vector<TComplex> buf;
TComplex cs;
TComplex ch;
std::vector<Result> result;
Result r;
r.info.parity = parity;
r.info.quarks = par().quarks;
r.info.prefactors = par().prefactors;
if (envHasType(SlicedPropagator1, par().q1) and
envHasType(SlicedPropagator2, par().q2) and
envHasType(SlicedPropagator3, par().q3))
{
auto &q1 = envGet(SlicedPropagator1, par().q1);
auto &q2 = envGet(SlicedPropagator2, par().q2);
auto &q3 = envGet(SlicedPropagator3, par().q3);
for (unsigned int i = 0; i < gammaList.size(); ++i)
{
r.info.gammaA_left = gammaList[i].first.first;
r.info.gammaB_left = gammaList[i].first.second;
r.info.gammaA_right = gammaList[i].second.first;
r.info.gammaB_right = gammaList[i].second.second;
Gamma gAl(gammaList[i].first.first);
Gamma gBl(gammaList[i].first.second);
Gamma gAr(gammaList[i].second.first);
Gamma gBr(gammaList[i].second.second);
LOG(Message) << "(propagator already sinked)" << std::endl;
r.corr.clear();
for (unsigned int t = 0; t < buf.size(); ++t)
{
cs = Zero();
for (int iQ1 = 0; iQ1 < nQ; iQ1++){
for (int iQ2 = 0; iQ2 < nQ; iQ2++){
BaryonUtils<FIMPL>::ContractBaryons_Sliced(q1[t],q2[t],q3[t],gAl,gBl,gAr,gBr,quarks[iQ1].c_str(),quarks[iQ2].c_str(),parity,ch);
cs += prefactors[iQ1]*prefactors[iQ2]*ch;
}
}
r.corr.push_back(TensorRemove(cs));
}
result.push_back(r);
}
}
else
{
auto &q1 = envGet(PropagatorField1, par().q1);
auto &q2 = envGet(PropagatorField2, par().q2);
auto &q3 = envGet(PropagatorField3, par().q3);
for (unsigned int i = 0; i < gammaList.size(); ++i)
{
r.info.gammaA_left = gammaList[i].first.first;
r.info.gammaB_left = gammaList[i].first.second;
r.info.gammaA_right = gammaList[i].second.first;
r.info.gammaB_right = gammaList[i].second.second;
Gamma gAl(gammaList[i].first.first);
Gamma gBl(gammaList[i].first.second);
Gamma gAr(gammaList[i].second.first);
Gamma gBr(gammaList[i].second.second);
std::string ns;
ns = vm().getModuleNamespace(env().getObjectModule(par().sink));
if (ns == "MSource")
{
c=Zero();
for (int iQ1 = 0; iQ1 < nQ; iQ1++){
for (int iQ2 = 0; iQ2 < nQ; iQ2++){
BaryonUtils<FIMPL>::ContractBaryons(q1,q2,q3,gAl,gBl,gAr,gBr,quarks[iQ1].c_str(),quarks[iQ2].c_str(),parity,c2);
c+=prefactors[iQ1]*prefactors[iQ2]*c2;
}
}
PropagatorField1 &sink = envGet(PropagatorField1, par().sink);
auto test = closure(trace(sink*c));
sliceSum(test, buf, Tp);
}
else if (ns == "MSink")
{
c=Zero();
for (int iQ1 = 0; iQ1 < nQ; iQ1++){
for (int iQ2 = 0; iQ2 < nQ; iQ2++){
BaryonUtils<FIMPL>::ContractBaryons(q1,q2,q3,gAl,gBl,gAr,gBr,quarks[iQ1].c_str(),quarks[iQ2].c_str(),parity,c2);
c+=prefactors[iQ1]*prefactors[iQ2]*c2;
}
}
SinkFnScalar &sink = envGet(SinkFnScalar, par().sink);
buf = sink(c);
}
r.corr.clear();
for (unsigned int t = 0; t < buf.size(); ++t)
{
r.corr.push_back(TensorRemove(buf[t]));
}
result.push_back(r);
}
}
saveResult(par().output, "baryon", result);
}
END_MODULE_NAMESPACE

View File

@ -57,7 +57,8 @@ BEGIN_HADRONS_NAMESPACE
* - q1: sink smeared propagator, source at i
* - q2: propagator, source at i
* - q3: propagator, source at f
* - gamma: gamma matrix to insert
* - gammas: gamma matrices to insert
* (space-separated strings e.g. "GammaT GammaX GammaY")
* - tSnk: sink position for propagator q1.
*
*/
@ -71,12 +72,12 @@ class Gamma3ptPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Gamma3ptPar,
std::string, q1,
std::string, q2,
std::string, q3,
Gamma::Algebra, gamma,
unsigned int, tSnk,
std::string, output);
std::string, q1,
std::string, q2,
std::string, q3,
std::string, gamma,
unsigned int, tSnk,
std::string, output);
};
template <typename FImpl1, typename FImpl2, typename FImpl3>
@ -100,6 +101,7 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
virtual void parseGammaString(std::vector<Gamma::Algebra> &gammaList);
protected:
// setup
virtual void setup(void);
@ -142,37 +144,67 @@ void TGamma3pt<FImpl1, FImpl2, FImpl3>::setup(void)
envTmpLat(LatticeComplex, "c");
}
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TGamma3pt<FImpl1, FImpl2, FImpl3>::parseGammaString(std::vector<Gamma::Algebra> &gammaList)
{
gammaList.clear();
// Determine gamma matrices to insert at source/sink.
if (par().gamma.compare("all") == 0)
{
// Do all contractions.
for (unsigned int i = 1; i < Gamma::nGamma; i += 2)
{
gammaList.push_back((Gamma::Algebra)i);
}
}
else
{
// Parse individual contractions from input string.
gammaList = strToVec<Gamma::Algebra>(par().gamma);
}
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TGamma3pt<FImpl1, FImpl2, FImpl3>::execute(void)
{
LOG(Message) << "Computing 3pt contractions '" << getName() << "' using"
<< " quarks '" << par().q1 << "', '" << par().q2 << "' and '"
<< par().q3 << "', with " << par().gamma << " insertion."
<< par().q3 << "', with " << par().gamma << " insertions."
<< std::endl;
// Initialise variables. q2 and q3 are normal propagators, q1 may be
// sink smeared.
auto &q1 = envGet(SlicedPropagator1, par().q1);
auto &q2 = envGet(PropagatorField2, par().q2);
auto &q3 = envGet(PropagatorField2, par().q3);
Gamma g5(Gamma::Algebra::Gamma5);
Gamma gamma(par().gamma);
std::vector<TComplex> buf;
Result result;
auto &q1 = envGet(SlicedPropagator1, par().q1);
auto &q2 = envGet(PropagatorField2, par().q2);
auto &q3 = envGet(PropagatorField2, par().q3);
Gamma g5(Gamma::Algebra::Gamma5);
std::vector<Gamma::Algebra> gammaList;
std::vector<TComplex> buf;
std::vector<Result> result;
int nt = env().getDim(Tp);
parseGammaString(gammaList);
result.resize(gammaList.size());
for (unsigned int i = 0; i < result.size(); ++i)
{
result[i].gamma = gammaList[i];
result[i].corr.resize(nt);
}
// Extract relevant timeslice of sinked propagator q1, then contract &
// sum over all spacial positions of gamma insertion.
SitePropagator1 q1Snk = q1[par().tSnk];
envGetTmp(LatticeComplex, c);
c = trace(g5*q1Snk*adj(q2)*(g5*gamma)*q3);
sliceSum(c, buf, Tp);
result.gamma = par().gamma;
result.corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
for (unsigned int i = 0; i < result.size(); ++i)
{
result.corr[t] = TensorRemove(buf[t]);
Gamma gamma(gammaList[i]);
c = trace(g5*q1Snk*adj(q2)*(g5*gamma)*q3);
sliceSum(c, buf, Tp);
for (unsigned int t = 0; t < buf.size(); ++t)
{
result[i].corr[t] = TensorRemove(buf[t]);
}
}
saveResult(par().output, "gamma3pt", result);
}

View File

@ -199,7 +199,7 @@ void TMeson<FImpl1, FImpl2>::execute(void)
Gamma gSnk(gammaList[i].first);
Gamma gSrc(gammaList[i].second);
for (unsigned int t = 0; t < buf.size(); ++t)
for (unsigned int t = 0; t < nt; ++t)
{
result[i].corr[t] = TensorRemove(trace(mesonConnected(q1[t], q2[t], gSnk, gSrc)));
}

View File

@ -0,0 +1,7 @@
#include <Hadrons/Modules/MContraction/SigmaToNucleonEye.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MContraction;
template class Grid::Hadrons::MContraction::TSigmaToNucleonEye<FIMPL>;

View File

@ -0,0 +1,218 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MContraction/SigmaToNucleonEye.hpp
Copyright (C) 2015-2019
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Felix Erben <felix.erben@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MContraction_SigmaToNucleonEye_hpp_
#define Hadrons_MContraction_SigmaToNucleonEye_hpp_
#include <Hadrons/Global.hpp>
#include <Hadrons/Module.hpp>
#include <Hadrons/ModuleFactory.hpp>
#include <Grid/qcd/utils/BaryonUtils.h>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* SigmaToNucleonEye *
******************************************************************************/
/*
* Sigma-to-nucleon 3-pt diagrams, eye topologies.
*
* Schematics: qqLoop |
* /->-¬ |
* / \ | qsTi G qdTf
* \ / | /---->------*------>----¬
* qsTi \ / qdTf | / /-*-¬ \
* /----->-----* *----->----¬ | / / G \ \
* * G G * | * \ / qqLoop *
* |\ /| | |\ \-<-/ /|
* | \ / | | | \ / |
* | \---------->---------/ | | | \----------->----------/ |
* \ quSpec / | \ quSpec /
* \ / | \ /
* \---------->---------/ | \----------->----------/
* quSpec | quSpec
*
* analogously to the rare-kaon naming, the left diagram is named 'one-trace' and
* the diagram on the right 'two-trace'
*
* Propagators:
* * qqLoop
* * quSpec, source at ti
* * qdTf, source at tf
* * qsTi, source at ti
*/
BEGIN_MODULE_NAMESPACE(MContraction)
class SigmaToNucleonEyePar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(SigmaToNucleonEyePar,
std::string, qqLoop,
std::string, quSpec,
std::string, qdTf,
std::string, qsTi,
unsigned int, tf,
std::string, sink,
std::string, output);
};
template <typename FImpl>
class TSigmaToNucleonEye: public Module<SigmaToNucleonEyePar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
BASIC_TYPE_ALIASES(ScalarImplCR, Scalar);
SINK_TYPE_ALIASES(Scalar);
typedef typename SpinMatrixField::vector_object::scalar_object SpinMatrix;
class Metadata: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Metadata,
Gamma::Algebra, gammaH,
Gamma::Algebra, gammaASigma,
Gamma::Algebra, gammaBSigma,
Gamma::Algebra, gammaANucl,
Gamma::Algebra, gammaBNucl,
int, trace);
};
typedef Correlator<Metadata, SpinMatrix> Result;
public:
// constructor
TSigmaToNucleonEye(const std::string name);
// destructor
virtual ~TSigmaToNucleonEye(void) {};
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
virtual void execute(void);
// Which gamma algebra was specified
Gamma::Algebra al;
};
MODULE_REGISTER_TMP(SigmaToNucleonEye, ARG(TSigmaToNucleonEye<FIMPL>), MContraction);
/******************************************************************************
* TSigmaToNucleonEye implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TSigmaToNucleonEye<FImpl>::TSigmaToNucleonEye(const std::string name)
: Module<SigmaToNucleonEyePar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TSigmaToNucleonEye<FImpl>::getInput(void)
{
std::vector<std::string> input = {par().qqLoop, par().quSpec, par().qdTf, par().qsTi, par().sink};
return input;
}
template <typename FImpl>
std::vector<std::string> TSigmaToNucleonEye<FImpl>::getOutput(void)
{
std::vector<std::string> out = {};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TSigmaToNucleonEye<FImpl>::setup(void)
{
envTmpLat(SpinMatrixField, "c");
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TSigmaToNucleonEye<FImpl>::execute(void)
{
const Gamma GammaB(Gamma::Algebra::SigmaXZ); // C*gamma_5
const Gamma Id(Gamma::Algebra::Identity); // C*gamma_5
LOG(Message) << "Computing sigma-to-nucleon contractions '" << getName() << "'" << std::endl;
LOG(Message) << "' with (Gamma^A,Gamma^B)_sigma = ( Identity, C*gamma_5 ) and (Gamma^A,Gamma^B)_nucl = ( Identity, C*gamma_5 )" << std::endl;
LOG(Message) << " using sink " << par().sink << "." << std::endl;
envGetTmp(SpinMatrixField, c);
std::vector<SpinMatrix> buf;
std::vector<Result> result;
Result r;
r.info.gammaASigma = Id.g;
r.info.gammaBSigma = GammaB.g;
r.info.gammaANucl = Id.g;
r.info.gammaBNucl = GammaB.g;
auto &qqLoop = envGet(PropagatorField, par().qqLoop);
auto &quSpec = envGet(SlicedPropagator, par().quSpec);
auto &qdTf = envGet(PropagatorField, par().qdTf);
auto &qsTi = envGet(PropagatorField, par().qsTi);
auto qut = quSpec[par().tf];
for (auto &G: Gamma::gall)
{
r.info.gammaH = G.g;
//Operator Q1, equivalent to the two-trace case in the rare-kaons module
c=Zero();
BaryonUtils<FIMPL>::Sigma_to_Nucleon_Eye(qqLoop,qut,qdTf,qsTi,G,GammaB,GammaB,"Q1",c);
sliceSum(c,buf,Tp);
r.corr.clear();
for (unsigned int t = 0; t < buf.size(); ++t)
{
r.corr.push_back(buf[t]);
}
r.info.trace = 2;
result.push_back(r);
//Operator Q2, equivalent to the one-trace case in the rare-kaons module
c=Zero();
BaryonUtils<FIMPL>::Sigma_to_Nucleon_Eye(qqLoop,qut,qdTf,qsTi,G,GammaB,GammaB,"Q2",c);
sliceSum(c,buf,Tp);
r.corr.clear();
for (unsigned int t = 0; t < buf.size(); ++t)
{
r.corr.push_back(buf[t]);
}
r.info.trace = 1;
result.push_back(r);
}
saveResult(par().output, "stnEye", result);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MContraction_SigmaToNucleonEye_hpp_

View File

@ -0,0 +1,7 @@
#include <Hadrons/Modules/MContraction/SigmaToNucleonNonEye.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MContraction;
template class Grid::Hadrons::MContraction::TSigmaToNucleonNonEye<FIMPL>;

View File

@ -0,0 +1,224 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MContraction/SigmaToNucleonNonEye.hpp
Copyright (C) 2015-2019
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Felix Erben <felix.erben@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MContraction_SigmaToNucleonNonEye_hpp_
#define Hadrons_MContraction_SigmaToNucleonNonEye_hpp_
#include <Hadrons/Global.hpp>
#include <Hadrons/Module.hpp>
#include <Hadrons/ModuleFactory.hpp>
#include <Grid/qcd/utils/BaryonUtils.h>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* SigmaToNucleonNonEye *
******************************************************************************/
/*
* Sigma-to-Nucleon 3-pt diagrams, non-eye topologies.
*
* Schematic:
* qsTi quTf | qsTi qdTf
* /-->--¬ /-->--¬ | /-->--¬ /-->--¬
* / \ / \ | / \ / \
* / \ / \ | / \ / \
* / \ / \ | / \ / \
* * * G * | * G * * G *
* |\ * G | | |\ / \ /|
* | \ / \ /| | | \ / \ / |
* | \ / \ / | | | \ / \ / |
* | \ / \ / | | | \-->--/ \-->--/ |
* \ \-->--/ \-->--/ / | \ quTi quTf /
* \ quTi qdTf / | \ /
* \ / | \ /
* \--------->----------/ | \--------->-----------/
* quSpec | quSpec
*
*
* analogously to the rare-kaon naming, the left diagram is named 'one-trace' and
* the diagram on the right 'two-trace'
*
* Propagators:
* * quTi, source at ti
* * quTf, source at tf
* * quSpec, source at ti
* * qdTf, source at tf
* * qsTi, source at ti
*/
BEGIN_MODULE_NAMESPACE(MContraction)
class SigmaToNucleonNonEyePar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(SigmaToNucleonNonEyePar,
std::string, quTi,
std::string, quTf,
std::string, quSpec,
std::string, qdTf,
std::string, qsTi,
unsigned int, tf,
std::string, sink,
std::string, output);
};
template <typename FImpl>
class TSigmaToNucleonNonEye: public Module<SigmaToNucleonNonEyePar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
BASIC_TYPE_ALIASES(ScalarImplCR, Scalar);
SINK_TYPE_ALIASES(Scalar);
typedef typename SpinMatrixField::vector_object::scalar_object SpinMatrix;
class Metadata: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Metadata,
Gamma::Algebra, gammaH,
Gamma::Algebra, gammaASigma,
Gamma::Algebra, gammaBSigma,
Gamma::Algebra, gammaANucl,
Gamma::Algebra, gammaBNucl,
int, trace);
};
typedef Correlator<Metadata, SpinMatrix> Result;
public:
// constructor
TSigmaToNucleonNonEye(const std::string name);
// destructor
virtual ~TSigmaToNucleonNonEye(void) {};
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
virtual void execute(void);
// Which gamma algebra was specified
Gamma::Algebra al;
};
MODULE_REGISTER_TMP(SigmaToNucleonNonEye, ARG(TSigmaToNucleonNonEye<FIMPL>), MContraction);
/******************************************************************************
* TSigmaToNucleonNonEye implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TSigmaToNucleonNonEye<FImpl>::TSigmaToNucleonNonEye(const std::string name)
: Module<SigmaToNucleonNonEyePar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TSigmaToNucleonNonEye<FImpl>::getInput(void)
{
std::vector<std::string> input = {par().quTi, par().quTf, par().quSpec, par().qdTf, par().qsTi, par().sink};
return input;
}
template <typename FImpl>
std::vector<std::string> TSigmaToNucleonNonEye<FImpl>::getOutput(void)
{
std::vector<std::string> out = {};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TSigmaToNucleonNonEye<FImpl>::setup(void)
{
envTmpLat(SpinMatrixField, "c");
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TSigmaToNucleonNonEye<FImpl>::execute(void)
{
const Gamma GammaB(Gamma::Algebra::SigmaXZ); // C*gamma_5
const Gamma Id(Gamma::Algebra::Identity); // C*gamma_5
LOG(Message) << "Computing sigma-to-nucleon contractions '" << getName() << "'" << std::endl;
LOG(Message) << "' with (Gamma^A,Gamma^B)_sigma = ( Identity, C*gamma_5 ) and (Gamma^A,Gamma^B)_nucl = ( Identity, C*gamma_5 )" << std::endl;
LOG(Message) << " using sink " << par().sink << "." << std::endl;
envGetTmp(SpinMatrixField, c);
std::vector<SpinMatrix> buf;
std::vector<Result> result;
Result r;
r.info.gammaASigma = Id.g;
r.info.gammaBSigma = GammaB.g;
r.info.gammaANucl = Id.g;
r.info.gammaBNucl = GammaB.g;
auto &quTi = envGet(PropagatorField, par().quTi);
auto &quTf = envGet(PropagatorField, par().quTf);
auto &quSpec = envGet(SlicedPropagator, par().quSpec);
auto &qdTf = envGet(PropagatorField, par().qdTf);
auto &qsTi = envGet(PropagatorField, par().qsTi);
auto qut = quSpec[par().tf];
for (auto &G: Gamma::gall)
{
r.info.gammaH = G.g;
//Operator Q1, equivalent to the two-trace case in the rare-kaons module
c=Zero();
BaryonUtils<FIMPL>::Sigma_to_Nucleon_NonEye(quTi,quTf,qut,qdTf,qsTi,G,GammaB,GammaB,"Q1",c);
sliceSum(c,buf,Tp);
r.corr.clear();
for (unsigned int t = 0; t < buf.size(); ++t)
{
r.corr.push_back(buf[t]);
}
r.info.trace = 2;
result.push_back(r);
//Operator Q2, equivalent to the one-trace case in the rare-kaons module
c=Zero();
BaryonUtils<FIMPL>::Sigma_to_Nucleon_NonEye(quTi,quTf,qut,qdTf,qsTi,G,GammaB,GammaB,"Q2",c);
sliceSum(c,buf,Tp);
r.corr.clear();
for (unsigned int t = 0; t < buf.size(); ++t)
{
r.corr.push_back(buf[t]);
}
r.info.trace = 1;
result.push_back(r);
}
saveResult(par().output, "stnNonEye", result);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MContraction_SigmaToNucleonNonEye_hpp_

View File

@ -144,7 +144,7 @@ void TWeakEye3pt<FImpl>::execute(void)
{
LOG(Message) << "Computing mesonic weak 3pt contractions, eye topologies" << std::endl;
LOG(Message) << "gIn : " << par().gammaIn << std::endl;
LOG(Message) << "gOut: " << par().gammaIn << std::endl;
LOG(Message) << "gOut: " << par().gammaOut << std::endl;
LOG(Message) << "tOut: " << par().tOut << std::endl;
LOG(Message) << "qbl : " << par().qBarLeft << std::endl;
LOG(Message) << "qbr : " << par().qBarRight << std::endl;

View File

@ -144,7 +144,7 @@ void TWeakNonEye3pt<FImpl>::execute(void)
{
LOG(Message) << "Computing mesonic weak 3pt contractions, non-eye topologies" << std::endl;
LOG(Message) << "gIn : " << par().gammaIn << std::endl;
LOG(Message) << "gOut: " << par().gammaIn << std::endl;
LOG(Message) << "gOut: " << par().gammaOut << std::endl;
LOG(Message) << "ql : " << par().qLeft << std::endl;
LOG(Message) << "qbl : " << par().qBarLeft << std::endl;
LOG(Message) << "qr : " << par().qRight << std::endl;

View File

@ -0,0 +1,124 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MDistil/Distil.hpp
Copyright (C) 2015-2019
Author: Felix Erben <ferben@ed.ac.uk>
Author: Michael Marshall <Michael.Marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MDistil_Distil_hpp_
#define Hadrons_MDistil_Distil_hpp_
#include <Hadrons/NamedTensor.hpp>
#include <Hadrons/Module.hpp>
#include <Hadrons/ModuleFactory.hpp>
#include <Hadrons/Solver.hpp>
#include <Hadrons/A2AVectors.hpp>
#include <Hadrons/DilutedNoise.hpp>
BEGIN_HADRONS_NAMESPACE
BEGIN_MODULE_NAMESPACE(MDistil)
/******************************************************************************
Distillation code that is common across modules
Documentation on how to use this code available at
* https://aportelli.github.io/Hadrons-doc/#/mdistil *
Notation for (stochastic) DistilParameters taken from 1104.3870:
TI is interlaced dilution in time (corresponding to Nt = time-dimension of the lattice)
LI is interlaced dilution in laplacian-eigenvector space (corresponding to nvec)
SI is interlaced dilution in spin (corresponding to Ns, taken from Grid, usually Ns=4)
This code automatically computes perambulators using exact distillation if
* (TI,LI,SI) = (Nt,nvec,Ns) *
In this case, nnoise=1 and Noises is set to an array of values =1 as well.
tsrc then specifies the only timeslice on which the sources are supported.
(( for stochastic distillation, the vaue of tsrc has no meaning in this code ))
******************************************************************************/
struct DistilParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(DistilParameters,
int, nvec,
int, nnoise,
int, tsrc,
int, TI,
int, LI,
int, SI )
};
/******************************************************************************
Make a lower dimensional grid in preparation for local slice operations
******************************************************************************/
inline void MakeLowerDimGrid( std::unique_ptr<GridCartesian> &up, GridCartesian * gridHD )
{
int nd{static_cast<int>(gridHD->_ndimension)};
Coordinate latt_size = gridHD->_gdimensions;
latt_size[nd-1] = 1;
Coordinate simd_layout = GridDefaultSimd(nd-1, vComplex::Nsimd());
simd_layout.push_back( 1 );
Coordinate mpi_layout = gridHD->_processors;
mpi_layout[nd-1] = 1;
up.reset( new GridCartesian(latt_size,simd_layout,mpi_layout,*gridHD) );
}
/*************************************************************************************
Rotate eigenvectors into our phase convention
First component of first eigenvector is real and positive
*************************************************************************************/
inline void RotateEigen(std::vector<LatticeColourVector> & evec)
{
ColourVector cv0;
auto grid = evec[0].Grid();
Coordinate siteFirst(grid->Nd(),0);
peekSite(cv0, evec[0], siteFirst);
const std::complex<Real> cplx0{cv0()()(0).real(), cv0()()(0).imag()};
if( cplx0.imag() == 0 )
LOG(Message) << "RotateEigen() : Site 0 : " << cplx0 << " => already meets phase convention" << std::endl;
else
{
const Real cplx0_mag{ std::abs(cplx0) };
const std::complex<Real> std_phase{std::conj(cplx0/cplx0_mag)};
LOG(Message) << "RotateEigen() : Site 0 : |" << cplx0 << "|=" << cplx0_mag
<< " => phase=" << (std::arg(std_phase) / M_PI) << " pi" << std::endl;
{
const Grid::Complex phase{std_phase.real(),std_phase.imag()};
for( int k = 0 ; k < evec.size() ; k++ )
evec[k] *= phase;
// Get rid of the rounding error in imaginary phase on the very first site
peekSite(cv0, evec[0], siteFirst);
cv0()()(0).imag(0); // this should be zero after the phase multiply - force it to be so
pokeSite(cv0, evec[0], siteFirst);
}
}
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif

View File

@ -0,0 +1,7 @@
#include <Hadrons/Modules/MDistil/DistilPar.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MDistil;
template class Grid::Hadrons::MDistil::TDistilPar<FIMPL>;

View File

@ -0,0 +1,97 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MDistil/DistilPar.hpp
Copyright (C) 2019
Author: Felix Erben <ferben@ed.ac.uk>
Author: Michael Marshall <Michael.Marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MDistil_DistilPar_hpp_
#define Hadrons_MDistil_DistilPar_hpp_
#include <Hadrons/Modules/MDistil/Distil.hpp>
BEGIN_HADRONS_NAMESPACE
BEGIN_MODULE_NAMESPACE(MDistil)
/******************************************************************************
* DistilPar *
******************************************************************************/
template <typename FImpl>
class TDistilPar: public Module<DistilParameters>
{
public:
// constructor
TDistilPar(const std::string name);
// destructor
virtual ~TDistilPar(void) {};
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_TMP(DistilPar, TDistilPar<FIMPL>, MDistil);
/******************************************************************************
* TDistilPar implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TDistilPar<FImpl>::TDistilPar(const std::string name) : Module<DistilParameters>(name) {}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TDistilPar<FImpl>::getInput(void)
{
return {};
}
template <typename FImpl>
std::vector<std::string> TDistilPar<FImpl>::getOutput(void)
{
return {getName()};
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDistilPar<FImpl>::setup(void)
{
envCreate(DistilParameters, getName(), 1, par() );
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDistilPar<FImpl>::execute(void)
{
// Nothing to do. setup() created and initialised the output object
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif

View File

@ -0,0 +1,36 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MDistil/DistilVectors.cc
Copyright (C) 2019
Author: Felix Erben <ferben@ed.ac.uk>
Author: Michael Marshall <Michael.Marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Hadrons/Modules/MDistil/DistilVectors.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MDistil;
template class Grid::Hadrons::MDistil::TDistilVectors<FIMPL>;

View File

@ -0,0 +1,243 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MDistil/DistilVectors.hpp
Copyright (C) 2019
Author: Felix Erben <ferben@ed.ac.uk>
Author: Michael Marshall <Michael.Marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MDistil_DistilVectors_hpp_
#define Hadrons_MDistil_DistilVectors_hpp_
#include <Hadrons/Modules/MDistil/Distil.hpp>
BEGIN_HADRONS_NAMESPACE
BEGIN_MODULE_NAMESPACE(MDistil)
/******************************************************************************
* DistilVectors *
* (Create rho and/or phi vectors) *
******************************************************************************/
class DistilVectorsPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(DistilVectorsPar,
std::string, noise,
std::string, perambulator,
std::string, lapevec,
std::string, rho,
std::string, phi,
std::string, DistilParams);
};
template <typename FImpl>
class TDistilVectors: public Module<DistilVectorsPar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
// constructor
TDistilVectors(const std::string name);
// destructor
virtual ~TDistilVectors(void) {};
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
protected:
std::unique_ptr<GridCartesian> grid3d; // Owned by me, so I must delete it
public:
// These variables contain parameters
std::string RhoName;
std::string PhiName;
};
MODULE_REGISTER_TMP(DistilVectors, TDistilVectors<FIMPL>, MDistil);
/******************************************************************************
* TDistilVectors implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TDistilVectors<FImpl>::TDistilVectors(const std::string name) : Module<DistilVectorsPar>(name) {}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TDistilVectors<FImpl>::getInput(void)
{
return {par().noise,par().perambulator,par().lapevec,par().DistilParams};
}
template <typename FImpl>
std::vector<std::string> TDistilVectors<FImpl>::getOutput(void)
{
RhoName = par().rho;
PhiName = par().phi;
if (RhoName.empty() && PhiName.empty())
{
HADRONS_ERROR(Argument,"No output specified");
}
std::vector<std::string> out;
if (!RhoName.empty())
out.push_back(RhoName);
if (!PhiName.empty())
out.push_back(PhiName);
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDistilVectors<FImpl>::setup(void)
{
// We expect the perambulator to have been created with these indices
auto &perambulator = envGet(PerambTensor, par().perambulator);
if (!perambulator.ValidateIndexNames())
{
HADRONS_ERROR(Range,"Perambulator index names bad");
}
const DistilParameters &dp{envGet(DistilParameters, par().DistilParams)};
const int Nt{env().getDim(Tdir)};
const bool full_tdil{ dp.TI == Nt };
const int Nt_inv{ full_tdil ? 1 : dp.TI };
if (!RhoName.empty())
envCreate(std::vector<FermionField>, RhoName, 1, dp.nnoise*dp.LI*dp.SI*Nt_inv, envGetGrid(FermionField));
if (!PhiName.empty())
envCreate(std::vector<FermionField>, PhiName, 1, dp.nnoise*dp.LI*dp.SI*Nt_inv, envGetGrid(FermionField));
Coordinate latt_size = GridDefaultLatt();
Coordinate mpi_layout = GridDefaultMpi();
Coordinate simd_layout_3 = GridDefaultSimd(Nd-1, vComplex::Nsimd());
latt_size[Nd-1] = 1;
simd_layout_3.push_back( 1 );
mpi_layout[Nd-1] = 1;
GridCartesian * const grid4d{env().getGrid()};
MakeLowerDimGrid(grid3d, grid4d);
envTmp(LatticeSpinColourVector, "source4d",1,LatticeSpinColourVector(grid4d));
envTmp(LatticeSpinColourVector, "source3d",1,LatticeSpinColourVector(grid3d.get()));
envTmp(LatticeColourVector, "source3d_nospin",1,LatticeColourVector(grid3d.get()));
envTmp(LatticeSpinColourVector, "sink3d",1,LatticeSpinColourVector(grid3d.get()));
envTmp(LatticeColourVector, "evec3d",1,LatticeColourVector(grid3d.get()));
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDistilVectors<FImpl>::execute(void)
{
auto &noise = envGet(NoiseTensor, par().noise);
auto &perambulator = envGet(PerambTensor, par().perambulator);
auto &epack = envGet(Grid::Hadrons::EigenPack<LatticeColourVector>, par().lapevec);
const DistilParameters &dp{envGet(DistilParameters, par().DistilParams)};
envGetTmp(LatticeSpinColourVector, source4d);
envGetTmp(LatticeSpinColourVector, source3d);
envGetTmp(LatticeColourVector, source3d_nospin);
envGetTmp(LatticeSpinColourVector, sink3d);
envGetTmp(LatticeColourVector, evec3d);
GridCartesian * const grid4d{env().getGrid()};
const int Ntlocal{ grid4d->LocalDimensions()[3] };
const int Ntfirst{ grid4d->LocalStarts()[3] };
const int Nt{env().getDim(Tdir)};
const bool full_tdil{ dp.TI == Nt };
const int Nt_inv{ full_tdil ? 1 : dp.TI };
int vecindex;
if (!RhoName.empty())
{
auto &rho = envGet(std::vector<FermionField>, RhoName);
for (int inoise = 0; inoise < dp.nnoise; inoise++)
{
for (int dk = 0; dk < dp.LI; dk++)
{
for (int dt = 0; dt < Nt_inv; dt++)
{
for (int ds = 0; ds < dp.SI; ds++)
{
vecindex = inoise + dp.nnoise * (dk + dp.LI * (ds + dp.SI * dt));
rho[vecindex] = 0;
for (int it = dt; it < Nt; it += dp.TI)
{
const int t_inv{full_tdil ? dp.tsrc : it};
if (t_inv >= Ntfirst && t_inv < Ntfirst + Ntlocal)
{
for (int ik = dk; ik < dp.nvec; ik += dp.LI)
{
for (int is = ds; is < Ns; is += dp.SI)
{
ExtractSliceLocal(evec3d,epack.evec[ik],0,t_inv-Ntfirst,Tdir);
source3d_nospin = evec3d * noise.tensor(inoise, t_inv, ik, is);
source3d=0;
pokeSpin(source3d,source3d_nospin,is);
source4d=0;
InsertSliceLocal(source3d,source4d,0,t_inv-Ntfirst,Tdir);
rho[vecindex] += source4d;
}
}
}
}
}
}
}
}
}
if (!PhiName.empty())
{
auto &phi = envGet(std::vector<FermionField>, PhiName);
for (int inoise = 0; inoise < dp.nnoise; inoise++)
{
for (int dk = 0; dk < dp.LI; dk++)
{
for (int dt = 0; dt < Nt_inv; dt++)
{
for (int ds = 0; ds < dp.SI; ds++)
{
vecindex = inoise + dp.nnoise * (dk + dp.LI * (ds + dp.SI * dt));
phi[vecindex] = 0;
for (int t = Ntfirst; t < Ntfirst + Ntlocal; t++)
{
sink3d=0;
for (int ivec = 0; ivec < dp.nvec; ivec++)
{
ExtractSliceLocal(evec3d,epack.evec[ivec],0,t-Ntfirst,Tdir);
sink3d += evec3d * perambulator.tensor(t, ivec, dk, inoise,dt,ds);
}
InsertSliceLocal(sink3d,phi[vecindex],0,t-Ntfirst,Tdir);
}
}
}
}
}
}
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MDistil_DistilVectors_hpp_

View File

@ -0,0 +1,36 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MDistil/LapEvec.cc
Copyright (C) 2019
Author: Felix Erben <ferben@ed.ac.uk>
Author: Michael Marshall <Michael.Marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Hadrons/Modules/MDistil/LapEvec.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MDistil;
template class Grid::Hadrons::MDistil::TLapEvec<GIMPL>;

View File

@ -0,0 +1,335 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MDistil/LapEvec.hpp
Copyright (C) 2019
Author: Felix Erben <ferben@ed.ac.uk>
Author: Michael Marshall <Michael.Marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MDistil_LapEvec_hpp_
#define Hadrons_MDistil_LapEvec_hpp_
#include <Hadrons/Modules/MDistil/Distil.hpp>
BEGIN_HADRONS_NAMESPACE
BEGIN_MODULE_NAMESPACE(MDistil)
/******************************************************************************
Laplacian eigenvectors - parameters
Computes the eigenvectors of the 3D-Laplacian, built from stout-smeared
gauge links with the specified number of steps and smearing parameter rho.
The smearing is only applied to the spatial components of the gauge field,
i.e. rho_{4i} = rho_{i4} = rho_{44} = 0.
Chebyshev-preconditioning is needed for convergence of the nvec lowest
eigenvectors.
******************************************************************************/
struct StoutParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(StoutParameters,
int, steps,
double, rho)
StoutParameters() = default;
template <class ReaderClass> StoutParameters(Reader<ReaderClass>& Reader){read(Reader,"StoutSmearing",*this);}
};
struct ChebyshevParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(ChebyshevParameters,
int, PolyOrder,
double, alpha,
double, beta)
ChebyshevParameters() = default;
template <class ReaderClass> ChebyshevParameters(Reader<ReaderClass>& Reader){read(Reader,"Chebyshev",*this);}
};
struct LanczosParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
int, Nvec,
int, Nk,
int, Np,
int, MaxIt,
double, resid,
int, IRLLog)
LanczosParameters() = default;
template <class ReaderClass> LanczosParameters(Reader<ReaderClass>& Reader){read(Reader,"Lanczos",*this);}
};
// These are the actual parameters passed to the module during construction
struct LapEvecPar: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(LapEvecPar
,std::string, gauge
,StoutParameters, Stout
,ChebyshevParameters, Cheby
,LanczosParameters, Lanczos
,std::string, FileName)
};
/******************************************************************************
Laplacian eigenvectors - Module (class) definition
******************************************************************************/
template <typename GImpl>
class TLapEvec: public Module<LapEvecPar>
{
public:
GAUGE_TYPE_ALIASES(GImpl,);
// constructor
TLapEvec(const std::string name);
// destructor
virtual ~TLapEvec(void) {};
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
protected:
std::unique_ptr<GridCartesian> gridLD; // Owned by me, so I must delete it
};
MODULE_REGISTER_TMP(LapEvec, TLapEvec<GIMPL>, MDistil);
/******************************************************************************
TLapEvec implementation
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename GImpl>
TLapEvec<GImpl>::TLapEvec(const std::string name) : Module<LapEvecPar>(name) {}
// dependencies/products ///////////////////////////////////////////////////////
template <typename GImpl>
std::vector<std::string> TLapEvec<GImpl>::getInput(void)
{
return std::vector<std::string>{par().gauge};
}
template <typename GImpl>
std::vector<std::string> TLapEvec<GImpl>::getOutput(void)
{
return {getName()}; // This is the higher dimensional eigenpack
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename GImpl>
void TLapEvec<GImpl>::setup(void)
{
GridCartesian * gridHD = env().getGrid();
MakeLowerDimGrid(gridLD,gridHD);
const int Ntlocal{gridHD->LocalDimensions()[Tdir]};
// Temporaries
envTmpLat(GaugeField, "Umu_stout");
envTmpLat(GaugeField, "Umu_smear");
envTmp(LatticeGaugeField, "UmuNoTime",1,LatticeGaugeField(gridLD.get()));
envTmp(LatticeColourVector, "src",1,LatticeColourVector(gridLD.get()));
envTmp(std::vector<LapEvecs>, "eig",1,std::vector<LapEvecs>(Ntlocal));
// Output objects
envCreate(LapEvecs, getName(), 1, par().Lanczos.Nvec, gridHD);
}
/*************************************************************************************
-Grad^2 (Peardon, 2009, pg 2, equation 3, https://arxiv.org/abs/0905.2160)
Field Type of field the operator will be applied to
GaugeField Gauge field the operator will smear using
*************************************************************************************/
template<typename Field, typename GaugeField=LatticeGaugeField>
class Laplacian3D : public LinearOperatorBase<Field>, public LinearFunction<Field> {
typedef typename GaugeField::vector_type vCoeff_t;
public:
int nd; // number of spatial dimensions
std::vector<Lattice<iColourMatrix<vCoeff_t> > > U;
// Construct this operator given a gauge field and the number of dimensions it should act on
Laplacian3D( GaugeField& gf, int dimSpatial = Tdir ) : nd{dimSpatial}
{
if (dimSpatial<1)
{
HADRONS_ERROR(Range,"Must be at least one spatial dimension");
}
for (int mu = 0 ; mu < nd ; mu++)
U.push_back(PeekIndex<LorentzIndex>(gf,mu));
}
// Apply this operator to "in", return result in "out"
void operator()(const Field& in, Field& out) {
if (nd > in.Grid()->Nd())
{
HADRONS_ERROR(Range,"nd too large");
}
conformable( in, out );
out = ( ( Real ) ( 2 * nd ) ) * in;
Field tmp_(in.Grid());
typedef typename GaugeField::vector_type vCoeff_t;
for (int mu = 0 ; mu < nd ; mu++)
{
out -= U[mu] * Cshift( in, mu, 1);
tmp_ = adj( U[mu] ) * in;
out -= Cshift(tmp_,mu,-1);
}
}
void OpDiag (const Field &in, Field &out) { HADRONS_ERROR(Definition, "OpDiag() undefined"); };
void OpDir (const Field &in, Field &out,int dir,int disp) { HADRONS_ERROR(Definition, "OpDir() undefined"); };
void Op (const Field &in, Field &out) { HADRONS_ERROR(Definition, "Op() undefined"); };
void AdjOp (const Field &in, Field &out) { HADRONS_ERROR(Definition, "AdjOp() undefined"); };
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2) { HADRONS_ERROR(Definition, "HermOpAndNorm() undefined"); };
void HermOp(const Field &in, Field &out) { operator()(in,out); };
};
template<typename Field>
class Laplacian3DHerm : public LinearFunction<Field> {
public:
OperatorFunction<Field> & poly_;
LinearOperatorBase<Field> &Linop_;
Laplacian3DHerm(OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop)
: poly_{poly}, Linop_{linop} {}
void operator()(const Field& in, Field& out)
{
poly_(Linop_,in,out);
}
};
/******************************************************************************
Calculate low-mode eigenvalues of the Laplacian
******************************************************************************/
// execution ///////////////////////////////////////////////////////////////////
template <typename GImpl>
void TLapEvec<GImpl>::execute(void)
{
const ChebyshevParameters &ChebPar{par().Cheby};
const LanczosParameters &LPar{par().Lanczos};
// Disable IRL logging if requested
LOG(Message) << "IRLLog=" << LPar.IRLLog << std::endl;
const int PreviousIRLLogState{GridLogIRL.isActive()};
GridLogIRL.Active( LPar.IRLLog == 0 ? 0 : 1 );
// Stout smearing
envGetTmp(GaugeField, Umu_smear);
Umu_smear = envGet(GaugeField, par().gauge); // The smeared field starts off as the Gauge field
LOG(Message) << "Initial plaquette: " << WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu_smear) << std::endl;
const StoutParameters &Stout{par().Stout};
if( Stout.steps )
{
envGetTmp(GaugeField, Umu_stout);
Smear_Stout<PeriodicGimplR> LS(Stout.rho, Tdir); // spatial smearing only
for (int i = 0; i < Stout.steps; i++) {
LS.smear(Umu_stout, Umu_smear);
Umu_smear = Umu_stout;
}
LOG(Message) << "Smeared plaquette: " << WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu_smear) << std::endl;
}
////////////////////////////////////////////////////////////////////////
// Invert nabla operator separately on each time-slice
////////////////////////////////////////////////////////////////////////
auto & eig4d = envGet(LapEvecs, getName() );
envGetTmp(std::vector<LapEvecs>, eig); // Eigenpack for each timeslice
envGetTmp(LatticeGaugeField, UmuNoTime); // Gauge field without time dimension
envGetTmp(LatticeColourVector, src);
GridCartesian * gridHD = env().getGrid();
const int Ntlocal{gridHD->LocalDimensions()[Tdir]};
const int Ntfirst{gridHD->LocalStarts()[Tdir]};
uint32_t ConvergenceErrors{0};
for (int t = 0; t < Ntlocal; t++ )
{
LOG(Message) << "------------------------------------------------------------" << std::endl;
LOG(Message) << " Compute eigenpack, local timeslice = " << t << " / " << Ntlocal << std::endl;
LOG(Message) << "------------------------------------------------------------" << std::endl;
eig[t].resize(LPar.Nk+LPar.Np,gridLD.get());
// Construct smearing operator
ExtractSliceLocal(UmuNoTime,Umu_smear,0,t,Tdir); // switch to 3d/4d objects
Laplacian3D<LatticeColourVector> Nabla(UmuNoTime);
LOG(Message) << "Chebyshev preconditioning to order " << ChebPar.PolyOrder
<< " with parameters (alpha,beta) = (" << ChebPar.alpha << "," << ChebPar.beta << ")" << std::endl;
Chebyshev<LatticeColourVector> Cheb(ChebPar.alpha,ChebPar.beta,ChebPar.PolyOrder);
// Construct source vector according to Test_dwf_compressed_lanczos.cc
src = 11.0; // NB: This is a dummy parameter and just needs to be non-zero
RealD nn = norm2(src);
nn = Grid::sqrt(nn);
src = src * (1.0/nn);
Laplacian3DHerm<LatticeColourVector> NablaCheby(Cheb,Nabla);
ImplicitlyRestartedLanczos<LatticeColourVector>
IRL(NablaCheby,Nabla,LPar.Nvec,LPar.Nk,LPar.Nk+LPar.Np,LPar.resid,LPar.MaxIt);
int Nconv = 0;
IRL.calc(eig[t].eval,eig[t].evec,src,Nconv);
if (Nconv < LPar.Nvec)
{
// NB: Can't assert here since we are processing local slices - i.e. not all nodes would assert
ConvergenceErrors = 1;
LOG(Error) << "MDistil::LapEvec : Not enough eigenvectors converged. If this occurs in practice, we should modify the eigensolver to iterate once more to ensure the second convergence test does not take us below the requested number of eigenvectors" << std::endl;
}
if( Nconv != LPar.Nvec )
eig[t].resize(LPar.Nvec, gridLD.get());
RotateEigen( eig[t].evec ); // Rotate the eigenvectors into our phase convention
for (int i=0;i<LPar.Nvec;i++){
InsertSliceLocal(eig[t].evec[i],eig4d.evec[i],0,t,Tdir);
if(t==0 && Ntfirst==0)
eig4d.eval[i] = eig[t].eval[i]; // TODO: Discuss: is this needed? Is there a better way?
}
}
GridLogIRL.Active( PreviousIRLLogState );
gridHD->GlobalSum(ConvergenceErrors);
if(ConvergenceErrors!=0)
{
HADRONS_ERROR(Program,"The eingensolver failed to find enough eigenvectors on at least one node");
}
// Now write out the 4d eigenvectors
std::string sEigenPackName(par().FileName);
if( !sEigenPackName.empty() )
{
eig4d.record.solverXml = parString();
ModuleBase * b{vm().getModule(par().gauge)};
std::string sOperatorXml{ "<module><id><type>" };
sOperatorXml.append( b->getRegisteredName() );
sOperatorXml.append( "</type></id><options>" );
sOperatorXml.append( b->parString() );
sOperatorXml.append( "</options></module>" );
eig4d.record.operatorXml = sOperatorXml;
sEigenPackName.append(".");
sEigenPackName.append(std::to_string(vm().getTrajectory()));
eig4d.write(sEigenPackName,false);
}
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MDistil_LapEvec_hpp_

View File

@ -0,0 +1,36 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MDistil/Noises.cc
Copyright (C) 2019
Author: Felix Erben <ferben@ed.ac.uk>
Author: Michael Marshall <Michael.Marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Hadrons/Modules/MDistil/Noises.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MDistil;
template class Grid::Hadrons::MDistil::TNoises<FIMPL>;

View File

@ -0,0 +1,146 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MDistil/Noises.hpp
Copyright (C) 2019
Author: Felix Erben <ferben@ed.ac.uk>
Author: Michael Marshall <Michael.Marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MDistil_Noises_hpp_
#define Hadrons_MDistil_Noises_hpp_
#include <Hadrons/Modules/MDistil/Distil.hpp>
BEGIN_HADRONS_NAMESPACE
BEGIN_MODULE_NAMESPACE(MDistil)
/******************************************************************************
* Noises *
******************************************************************************/
class NoisesPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(NoisesPar,
std::string, DistilParams,
std::string, NoiseFileName)
};
template <typename FImpl>
class TNoises: public Module<NoisesPar>
{
public:
// constructor
TNoises(const std::string name);
// destructor
virtual ~TNoises(void) {};
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_TMP(Noises, TNoises<FIMPL>, MDistil);
/******************************************************************************
* TNoises implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TNoises<FImpl>::TNoises(const std::string name) : Module<NoisesPar>(name) {}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TNoises<FImpl>::getInput(void)
{
return {par().DistilParams};
}
template <typename FImpl>
std::vector<std::string> TNoises<FImpl>::getOutput(void)
{
return {getName()};
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TNoises<FImpl>::setup(void)
{
const DistilParameters &dp{envGet(DistilParameters, par().DistilParams)};
const int Nt{env().getDim(Tdir)};
std::cout << dp.nnoise << dp.nvec << Nt << Ns << std::endl;
envCreate(NoiseTensor, getName(), 1, dp.nnoise, Nt, dp.nvec, Ns);
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TNoises<FImpl>::execute(void)
{
const DistilParameters &dp{envGet(DistilParameters, par().DistilParams)};
const int Nt{env().getDim(Tdir)};
const bool full_tdil{ dp.TI == Nt };
const bool exact_distillation{ full_tdil && dp.LI == dp.nvec };
// We use our own seeds so we can specify different noises per quark
Real rn;
auto &noise = envGet(NoiseTensor, getName());
for (int inoise = 0; inoise < dp.nnoise; inoise++)
{
for (int t = 0; t < Nt; t++)
{
for (int ivec = 0; ivec < dp.nvec; ivec++)
{
for (int is = 0; is < Ns; is++)
{
if (exact_distillation)
{
noise.tensor(inoise, t, ivec, is) = 1.;
}
else
{
random(rngSerial(),rn);
// We could use a greater number of complex roots of unity
// ... but this seems to work well
noise.tensor(inoise, t, ivec, is) = (rn > 0.5) ? -1 : 1;
}
}
}
}
}
if (env().getGrid()->IsBoss())
{
std::string sName {par().NoiseFileName};
sName.append(".");
sName.append(std::to_string(vm().getTrajectory()));
noise.write(sName.c_str());
}
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif

View File

@ -0,0 +1,36 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MDistil/PerambFromSolve.cc
Copyright (C) 2019
Author: Felix Erben <ferben@ed.ac.uk>
Author: Michael Marshall <Michael.Marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Hadrons/Modules/MDistil/PerambFromSolve.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MDistil;
template class Grid::Hadrons::MDistil::TPerambFromSolve<FIMPL>;

View File

@ -0,0 +1,183 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MDistil/PerambFromSolve.hpp
Copyright (C) 2019
Author: Felix Erben <ferben@ed.ac.uk>
Author: Michael Marshall <Michael.Marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MDistil_PerambFromSolve_hpp_
#define Hadrons_MDistil_PerambFromSolve_hpp_
#include <Hadrons/Modules/MDistil/Distil.hpp>
BEGIN_HADRONS_NAMESPACE
BEGIN_MODULE_NAMESPACE(MDistil)
/******************************************************************************
* PerambFromSolve
This module computes a perambulator from an already completed solve.
Optionally, the number of eigenvectors used in the perambulator and the
parameter LI can be chosen to be lower than the ones in the solve, allowing
for a study of the signal with different values of nvec.
LI_reduced : value of LI actually used in the computation
nvec_reduced: value of nvec actually used in the computation
LI : value of LI used to compute the 'solve'
nvec : value of nvec used to compute the 'solve'
******************************************************************************/
class PerambFromSolvePar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(PerambFromSolvePar,
std::string, lapevec,
std::string, PerambFileName,
std::string, solve,
int, nvec_reduced,
int, LI_reduced,
std::string, DistilParams);
};
template <typename FImpl>
class TPerambFromSolve: public Module<PerambFromSolvePar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
// constructor
TPerambFromSolve(const std::string name);
// destructor
virtual ~TPerambFromSolve(void) {};
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
protected:
std::unique_ptr<GridCartesian> grid3d; // Owned by me, so I must delete it
};
MODULE_REGISTER_TMP(PerambFromSolve, TPerambFromSolve<FIMPL>, MDistil);
/******************************************************************************
* TPerambFromSolve implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TPerambFromSolve<FImpl>::TPerambFromSolve(const std::string name) : Module<PerambFromSolvePar>(name){}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TPerambFromSolve<FImpl>::getInput(void)
{
return {par().solve, par().lapevec, par().DistilParams};
}
template <typename FImpl>
std::vector<std::string> TPerambFromSolve<FImpl>::getOutput(void)
{
return std::vector<std::string>{getName()};
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TPerambFromSolve<FImpl>::setup(void)
{
const DistilParameters & dp{envGet(MDistil::DistilParameters, par().DistilParams)};
const int Nt{env().getDim(Tdir)};
const bool full_tdil{ dp.TI == Nt };
const int Nt_inv{ full_tdil ? 1 : dp.TI };
MakeLowerDimGrid( grid3d, env().getGrid() );
const int nvec_reduced{par().nvec_reduced};
const int LI_reduced{ par().LI_reduced};
envCreate(PerambTensor, getName(), 1, Nt,nvec_reduced,LI_reduced,dp.nnoise,Nt_inv,dp.SI);
envCreate(NoiseTensor, getName() + "_noise", 1, dp.nnoise, Nt, dp.nvec, Ns );
envTmp(LatticeColourVector, "result3d_nospin",1,LatticeColourVector(grid3d.get()));
envTmp(LatticeColourVector, "evec3d",1,LatticeColourVector(grid3d.get()));
envTmpLat(LatticeColourVector, "result4d_nospin");
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TPerambFromSolve<FImpl>::execute(void)
{
GridCartesian * grid4d = env().getGrid();
const int Ntlocal{grid4d->LocalDimensions()[3]};
const int Ntfirst{grid4d->LocalStarts()[3]};
const DistilParameters &dp{envGet(DistilParameters, par().DistilParams)};
const int Nt{env().getDim(Tdir)};
const bool full_tdil{ dp.TI == Nt };
const int Nt_inv{ full_tdil ? 1 : dp.TI };
const int nvec_reduced{par().nvec_reduced};
const int LI_reduced{ par().LI_reduced};
auto &perambulator = envGet(PerambTensor, getName());
auto &solve = envGet(std::vector<FermionField>, par().solve);
auto &epack = envGet(Grid::Hadrons::EigenPack<LatticeColourVector>, par().lapevec);
envGetTmp(LatticeColourVector, result4d_nospin);
envGetTmp(LatticeColourVector, result3d_nospin);
envGetTmp(LatticeColourVector, evec3d);
for (int inoise = 0; inoise < dp.nnoise; inoise++)
{
for (int dk = 0; dk < LI_reduced; dk++)
{
for (int dt = 0; dt < Nt_inv; dt++)
{
for (int ds = 0; ds < dp.SI; ds++)
{
for (int is = 0; is < Ns; is++)
{
result4d_nospin = peekSpin(solve[inoise+dp.nnoise*(dk+dp.LI*(dt+Nt_inv*ds))],is);
for (int t = Ntfirst; t < Ntfirst + Ntlocal; t++)
{
ExtractSliceLocal(result3d_nospin,result4d_nospin,0,t-Ntfirst,Tdir);
for (int ivec = 0; ivec < nvec_reduced; ivec++)
{
ExtractSliceLocal(evec3d,epack.evec[ivec],0,t-Ntfirst,Tdir);
pokeSpin(perambulator.tensor(t, ivec, dk, inoise,dt,ds),static_cast<Complex>(innerProduct(evec3d, result3d_nospin)),is);
LOG(Message) << "perambulator(t, ivec, dk, inoise,dt,ds)(is) = (" << t << "," << ivec << "," << dk << "," << inoise << "," << dt << "," << ds << ")(" << is << ") = " << perambulator.tensor(t, ivec, dk, inoise,dt,ds)()(is)() << std::endl;
}
}
}
}
}
}
}
if(grid4d->IsBoss())
{
std::string sPerambName{par().PerambFileName};
sPerambName.append( "." );
sPerambName.append( std::to_string(vm().getTrajectory()));
perambulator.write(sPerambName.c_str());
}
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MDistil_PerambFromSolve_hpp_

View File

@ -0,0 +1,57 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MDistil/Perambulator.cc
Copyright (C) 2019
Author: Felix Erben <ferben@ed.ac.uk>
Author: Michael Marshall <Michael.Marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Hadrons/Modules/MDistil/Perambulator.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MDistil;
template class Grid::Hadrons::MDistil::TPerambulator<FIMPL>;
BEGIN_HADRONS_NAMESPACE
// Global constants for distillation
#ifdef HAVE_HDF5
extern const std::string NamedTensorFileExtension{".h5"};
#else
extern const std::string NamedTensorFileExtension{".dat"};
#endif
BEGIN_MODULE_NAMESPACE(MDistil)
const std::string NoiseTensor::Name__{"Noises"};
const std::array<std::string, 4> NoiseTensor::DefaultIndexNames__{"nNoise", "nT", "nVec", "nS"};
const std::string PerambTensor::Name__{"Perambulator"};
const std::array<std::string, 6> PerambTensor::DefaultIndexNames__{"nT", "nVec", "LI", "nNoise", "nT_inv", "SI"};
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE

View File

@ -0,0 +1,263 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MDistil/Perambulator.hpp
Copyright (C) 2019
Author: Felix Erben <ferben@ed.ac.uk>
Author: Michael Marshall <Michael.Marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MDistil_Perambulator_hpp_
#define Hadrons_MDistil_Perambulator_hpp_
#include <Hadrons/Modules/MDistil/Distil.hpp>
BEGIN_HADRONS_NAMESPACE
BEGIN_MODULE_NAMESPACE(MDistil)
/******************************************************************************
* Perambulator *
******************************************************************************/
class PerambulatorPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(PerambulatorPar,
std::string, lapevec,
std::string, solver,
std::string, noise,
std::string, PerambFileName,
std::string, UnsmearedSinkFileName,
std::string, DistilParams);
};
template <typename FImpl>
class TPerambulator: public Module<PerambulatorPar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
SOLVER_TYPE_ALIASES(FImpl,);
// constructor
TPerambulator(const std::string name);
// destructor
virtual ~TPerambulator(void) {};
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
protected:
std::unique_ptr<GridCartesian> grid3d; // Owned by me, so I must delete it
unsigned int Ls_;
};
MODULE_REGISTER_TMP(Perambulator, TPerambulator<FIMPL>, MDistil);
/******************************************************************************
* TPerambulator implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TPerambulator<FImpl>::TPerambulator(const std::string name) : Module<PerambulatorPar>(name) {}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TPerambulator<FImpl>::getInput(void)
{
return {par().lapevec, par().solver, par().noise, par().DistilParams};
}
template <typename FImpl>
std::vector<std::string> TPerambulator<FImpl>::getOutput(void)
{
return {getName(), getName() + "_unsmeared_sink"};
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TPerambulator<FImpl>::setup(void)
{
MakeLowerDimGrid(grid3d, env().getGrid());
const DistilParameters &dp = envGet(DistilParameters, par().DistilParams);
const int Nt{env().getDim(Tdir)};
const bool full_tdil{ dp.TI == Nt };
const int Nt_inv{ full_tdil ? 1 : dp.TI };
envCreate(PerambTensor, getName(), 1, Nt, dp.nvec, dp.LI, dp.nnoise, Nt_inv, dp.SI);
envCreate(std::vector<FermionField>, getName() + "_unsmeared_sink", 1,
dp.nnoise*dp.LI*Ns*Nt_inv, envGetGrid(FermionField));
envTmpLat(LatticeSpinColourVector, "dist_source");
envTmpLat(LatticeSpinColourVector, "source4d");
envTmp(LatticeSpinColourVector, "source3d",1,LatticeSpinColourVector(grid3d.get()));
envTmp(LatticeColourVector, "source3d_nospin",1,LatticeColourVector(grid3d.get()));
envTmpLat(LatticeSpinColourVector, "result4d");
envTmpLat(LatticeColourVector, "result4d_nospin");
envTmp(LatticeColourVector, "result3d_nospin",1,LatticeColourVector(grid3d.get()));
envTmp(LatticeColourVector, "evec3d",1,LatticeColourVector(grid3d.get()));
Ls_ = env().getObjectLs(par().solver);
envTmpLat(FermionField, "v4dtmp");
envTmpLat(FermionField, "v5dtmp", Ls_);
envTmpLat(FermionField, "v5dtmp_sol", Ls_);
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TPerambulator<FImpl>::execute(void)
{
const DistilParameters &dp{ envGet(DistilParameters, par().DistilParams) };
const int Nt{env().getDim(Tdir)};
const bool full_tdil{ dp.TI == Nt };
const int Nt_inv{ full_tdil ? 1 : dp.TI };
auto &solver=envGet(Solver, par().solver);
auto &mat = solver.getFMat();
envGetTmp(FermionField, v4dtmp);
envGetTmp(FermionField, v5dtmp);
envGetTmp(FermionField, v5dtmp_sol);
auto &noise = envGet(NoiseTensor, par().noise);
auto &perambulator = envGet(PerambTensor, getName());
auto &epack = envGet(LapEvecs, par().lapevec);
auto &unsmeared_sink = envGet(std::vector<FermionField>, getName() + "_unsmeared_sink");
envGetTmp(LatticeSpinColourVector, dist_source);
envGetTmp(LatticeSpinColourVector, source4d);
envGetTmp(LatticeSpinColourVector, source3d);
envGetTmp(LatticeColourVector, source3d_nospin);
envGetTmp(LatticeSpinColourVector, result4d);
envGetTmp(LatticeColourVector, result4d_nospin);
envGetTmp(LatticeColourVector, result3d_nospin);
envGetTmp(LatticeColourVector, evec3d);
GridCartesian * const grid4d{ env().getGrid() }; // Owned by environment (so I won't delete it)
const int Ntlocal{grid4d->LocalDimensions()[3]};
const int Ntfirst{grid4d->LocalStarts()[3]};
const std::string UnsmearedSinkFileName{ par().UnsmearedSinkFileName };
for (int inoise = 0; inoise < dp.nnoise; inoise++)
{
for (int dk = 0; dk < dp.LI; dk++)
{
for (int dt = 0; dt < Nt_inv; dt++)
{
for (int ds = 0; ds < dp.SI; ds++)
{
LOG(Message) << "LapH source vector from noise " << inoise << " and dilution component (d_k,d_t,d_alpha) : (" << dk << ","<< dt << "," << ds << ")" << std::endl;
dist_source = 0;
evec3d = 0;
for (int it = dt; it < Nt; it += dp.TI)
{
const int t_inv{full_tdil ? dp.tsrc : it};
if( t_inv >= Ntfirst && t_inv < Ntfirst + Ntlocal )
{
for (int ik = dk; ik < dp.nvec; ik += dp.LI)
{
for (int is = ds; is < Ns; is += dp.SI)
{
ExtractSliceLocal(evec3d,epack.evec[ik],0,t_inv-Ntfirst,Tdir);
source3d_nospin = evec3d * noise.tensor(inoise, t_inv, ik, is);
source3d=0;
pokeSpin(source3d,source3d_nospin,is);
source4d=0;
InsertSliceLocal(source3d,source4d,0,t_inv-Ntfirst,Tdir);
dist_source += source4d;
}
}
}
}
result4d=0;
v4dtmp = dist_source;
if (Ls_ == 1)
solver(result4d, v4dtmp);
else
{
mat.ImportPhysicalFermionSource(v4dtmp, v5dtmp);
solver(v5dtmp_sol, v5dtmp);
mat.ExportPhysicalFermionSolution(v5dtmp_sol, v4dtmp);
result4d = v4dtmp;
}
if (!UnsmearedSinkFileName.empty())
unsmeared_sink[inoise+dp.nnoise*(dk+dp.LI*(dt+Nt_inv*ds))] = result4d;
for (int is = 0; is < Ns; is++)
{
result4d_nospin = peekSpin(result4d,is);
for (int t = Ntfirst; t < Ntfirst + Ntlocal; t++)
{
ExtractSliceLocal(result3d_nospin,result4d_nospin,0,t-Ntfirst,Tdir);
for (int ivec = 0; ivec < dp.nvec; ivec++)
{
ExtractSliceLocal(evec3d,epack.evec[ivec],0,t-Ntfirst,Tdir);
pokeSpin(perambulator.tensor(t, ivec, dk, inoise,dt,ds),static_cast<Complex>(innerProduct(evec3d, result3d_nospin)),is);
}
}
}
}
}
}
}
// Now share my timeslice data with other members of the grid
const int NumSlices{grid4d->_processors[Tdir] / grid3d->_processors[Tdir]};
if (NumSlices > 1)
{
LOG(Debug) << "Sharing perambulator data with other nodes" << std::endl;
const int MySlice {grid4d->_processor_coor[Tdir]};
const int SliceCount {static_cast<int>(perambulator.tensor.size()/NumSlices)};
PerambTensor::Scalar * const MyData {perambulator.tensor.data()+MySlice*SliceCount};
Coordinate coor(Nd);
for (int i = 0 ; i < Tdir ; i++) coor[i] = grid4d->_processor_coor[i];
std::vector<CommsRequest_t> reqs(0);
for (int i = 1; i < NumSlices ; i++)
{
coor[Tdir] = (MySlice+i)%NumSlices;
const int SendRank { grid4d->RankFromProcessorCoor(coor) };
const int RecvSlice { ( MySlice - i + NumSlices ) % NumSlices };
coor[Tdir] = RecvSlice;
const auto RecvRank = grid4d->RankFromProcessorCoor(coor);
grid4d->SendToRecvFromBegin(reqs,MyData,SendRank, perambulator.tensor.data()
+ RecvSlice*SliceCount,RecvRank,SliceCount*sizeof(PerambTensor::Scalar));
}
grid4d->SendToRecvFromComplete(reqs);
}
// Save the perambulator to disk from the boss node
if (grid4d->IsBoss())
{
std::string sPerambName {par().PerambFileName};
sPerambName.append(".");
sPerambName.append(std::to_string(vm().getTrajectory()));
perambulator.write(sPerambName.c_str());
}
//Save the unsmeared sinks if filename specified
if (!UnsmearedSinkFileName.empty())
{
LOG(Message) << "Writing unsmeared sink to " << UnsmearedSinkFileName << std::endl;
A2AVectorsIo::write(UnsmearedSinkFileName, unsmeared_sink, false, vm().getTrajectory());
}
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MDistil_Perambulator_hpp_

Some files were not shown because too many files have changed in this diff Show More