1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-14 05:07:05 +01:00

Compare commits

...

454 Commits

Author SHA1 Message Date
55c095f620 Merge pull request #226 from nils-asmussen/fix/Gauss
Fix compiling of MSource::Gauss for single precision
2019-08-14 17:50:38 +01:00
e3966aa49b Fix compiling of MSource::Gauss for single precision 2019-08-12 14:57:11 +01:00
c2c4252a07 Merge pull request #216 from nils-asmussen/feature/GaussianSmearing
feature/gaussian smearing
2019-08-08 12:29:55 +02:00
bca36d9bc3 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-07-30 22:51:23 +01:00
263dcbabab Simplify the comms benchmark 2019-07-30 22:51:04 +01:00
8c6016f717 Merge pull request #219 from mmphys/feature/include
Housekeeping. #include <Grid.h> ---> #include <Grid/Grid.h>
2019-07-29 23:08:01 +01:00
76c704b84b Intrinsics for CLANG are now fixed in v6 2019-07-20 16:52:24 +01:00
671bcbcccb Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-07-19 10:48:22 +01:00
ff325376cb Fix single precision deriv test fail 2019-07-19 10:47:44 +01:00
9e926e3fc5 Build fix in develop 2019-07-19 10:01:52 +01:00
c3d0c176ab cleaning up Kl2 contraction 2019-07-05 16:29:46 +01:00
0a71f8bb10 Merge pull request #222 from guelpers/feature/kl2QEDseq
EMLepton: Multiple source-sink separations at once
2019-07-05 16:22:34 +01:00
3a31ba2ea2 Merge remote-tracking branch 'upstream/develop' into feature/kl2QEDseq 2019-07-03 14:37:56 +01:00
eac6337466 Hadrons: EMLepton: multiple source-sink separations at once 2019-07-03 14:36:34 +01:00
ab7537e002 Merge pull request #221 from fionnoh/bugfix/A2ALoop
Bugfix for A2ALoop module
2019-07-03 14:13:51 +01:00
1059189abf Bugfix for A2ALoop module 2019-06-27 13:49:55 +08:00
c81d3d422d Housekeeping. #include <Grid.h> ---> #include <Grid/Grid.h> 2019-06-03 15:25:05 +01:00
620965781e MSource::Convolution remove test code 2019-06-02 13:44:19 +01:00
9c18638b24 MSource::Convolution let mom argument be Nd dimensional 2019-06-02 13:41:39 +01:00
4bfe678218 MSource::Gauss Integer is unsigned... 2019-06-02 12:36:57 +01:00
fc6e584f2c MSource::Gauss fix sign in exponent of normalization + use correct types 2019-06-02 11:52:05 +01:00
7c3f400fc5 MSource::Gauss add parameters tA and tB 2019-06-02 00:12:15 +01:00
4bca2c17ce MSource::Convolution rename parameters 2019-06-02 00:04:07 +01:00
8d540a4e85 MSource::Gauss add mom parameter + avoid Cshifts 2019-06-01 23:56:14 +01:00
b120ef1fe4 Merge pull request #217 from guelpers/feature/EMlepwall
Hadrons: EMLepton: Wall source
2019-05-30 11:13:27 +02:00
166feb6483 Hadrons: EMLepton: Wall source 2019-05-30 10:07:08 +01:00
f569813b60 remove commented code 2019-05-29 17:07:07 +01:00
0190ada714 Merge branch 'develop' into feature/GaussianSmearing 2019-05-29 17:01:17 +01:00
de1a1dccb3 MSource::Gauss and MSource::Convolution: change LatticeComplex to ComplexField 2019-05-29 16:25:45 +01:00
0b3f40ce16 MSource::Convolution fix sign in Momentum 2019-05-29 16:06:10 +01:00
e35e8da111 Revert "cleaning up Kl2 contraction"
This reverts commit f244fed6ab.
2019-05-29 11:23:17 +02:00
6fdf93d695 move momentum phase from MSource::Gauss to MSource::Convolution 2019-05-28 17:26:55 +01:00
6064f96fde MSource::Gauss remove superfluous comment 2019-05-24 20:18:37 +01:00
4e52e46a2c MSource::Gauss fix missing factor 2019-05-24 20:16:09 +01:00
6b27369ade MSource::Convolution use type PropagatorField 2019-05-24 16:07:08 +01:00
ab2e5f88cd add fields as input (for scheduler) 2019-05-24 15:57:30 +01:00
f244fed6ab cleaning up Kl2 contraction 2019-05-24 13:08:35 +01:00
9b3701ae27 posibility to save/load schedules directly from the application parameters 2019-05-24 13:08:20 +01:00
4ac27340b9 moving VERSION file to the empty ChangeLog one, this create compilation problems with #include <version> in recent versions of LLVM and case-insensitive FS (typically macOS) 2019-05-24 13:05:17 +01:00
c7c0a1065f Merge pull request #214 from guelpers/feature/kl2QEDseq
Kl2 contraction with sequential propagators
2019-05-23 20:31:41 +01:00
80947130f9 Merge pull request #215 from fionnoh/develop
Added precision tuning to Hadrons parameterfile writing
2019-05-23 18:44:58 +01:00
0aee73ea6b Added precision tuning to Hadrons parameterfile writing 2019-05-23 18:43:25 +01:00
e43d59045e add option mom to MSource::Gauss 2019-05-23 17:33:32 +01:00
e553678599 add modules MSource::Gauss and MSource::Convolution 2019-05-23 16:38:13 +01:00
0290ee1f6d Merge pull request #213 from fionnoh/develop
Added ZFIMPL to SeqConserved module
2019-05-23 13:46:02 +01:00
9a34edcf9f Kl2 QED cleanup 2019-05-23 13:43:22 +01:00
246f10001e Added ZFIMPL to SeqGamma 2019-05-23 12:42:40 +01:00
e675c6a48c Merge remote-tracking branch 'upstream/develop' into feature/kl2QED 2019-05-23 12:41:54 +01:00
a66d110b88 Added ZFIMPL to SeqConserved module 2019-05-23 11:49:54 +01:00
918e673078 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-05-22 09:57:02 +01:00
44b53c3ba2 F1 ensemble running with 96%~ acceptance etc.. 2019-05-22 09:56:26 +01:00
2095c12eac Make detection of HPE 8600 automatic 2019-05-22 09:54:21 +01:00
ae5ad986e2 Merge remote-tracking branch 'upstream/develop' into feature/kl2QED 2019-05-19 14:35:46 +01:00
77ca45ff49 Merge pull request #211 from fionnoh/develop
Enum for gaugefix and bug fix for wall source
2019-05-18 18:57:52 +01:00
dbd7f3f0fc Added variables that were missing from wall source setup 2019-05-17 19:10:09 +01:00
d14512ee03 Exposed a coulomb/landau enum to the gauge fixing module 2019-05-17 19:01:52 +01:00
48b1c806ed Coulomb gauge added as an option 2019-05-17 17:36:32 +01:00
0a8b6724ef Merge pull request #209 from fionnoh/develop
Added gauge transform option to eigpack IO
2019-05-15 18:09:44 +02:00
ce102ac550 More logging, timing, and 4d/5d logic for eigpack gauge transforms 2019-05-15 14:31:25 +01:00
94accec311 Added gauge transform option to eigpack IO 2019-05-15 13:35:47 +01:00
d8512b03f8 Merge pull request #195 from nils-asmussen/fix_GaugeProp_4d
MFermion::GaugeProp fix for 4d fields
2019-05-12 21:31:18 +02:00
d90cf9d022 Merge pull request #207 from fionnoh/develop
Weak Hamiltonian and contraction bug fixes
2019-05-12 21:30:20 +02:00
79e930ba12 Hadrons: Lepton Propagator for kl2, sign swap for antiperiodic boundary 2019-05-10 12:46:18 +01:00
2acd8ece65 Hadron WeakEye and A2ALoop bug fixes, and WWVVContraction bug fix 2019-05-08 10:57:36 +01:00
b638509c61 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-05-08 10:51:04 +01:00
edeb590818 DiskVector: fix of memory bug triggering segfault when the cache is accessed following a certain pattern 2019-05-03 17:09:47 +01:00
4f0631615f A2A Lepton-Meson Field contraction 2019-04-30 12:04:59 +01:00
c2cd0e15d7 Merge remote-tracking branch 'upstream/develop' into feature/kl2QED
Conflicts:
	Grid/qcd/action/fermion/DomainWallFermion.h
	Grid/qcd/action/fermion/FermionOperator.h
2019-04-29 12:07:20 +01:00
df41de4cb6 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-04-24 12:02:50 +01:00
6d0b985697 Verbose 2019-04-24 06:29:52 +01:00
94ebcf551c Iteratoin range fix 2019-04-24 06:28:14 +01:00
6fd4b0be91 Evolving HMC status 2019-04-23 21:54:45 +01:00
7894ea6263 Now have mixed precision solves in the 2f sector 2019-04-23 21:54:19 +01:00
73d4676997 Action and Deriv solvers allowed to differ 2019-04-23 21:53:44 +01:00
262a73c964 COmment improvement 2019-04-23 21:52:58 +01:00
5921b1d2b9 Layout/whitespace changes 2019-04-23 21:52:33 +01:00
6505efcb57 Set iteration count if guess is already good 2019-04-23 21:51:57 +01:00
b595f58e4c Allow HMC to acces matrix 2019-04-23 21:51:23 +01:00
b0de7ab7db Extra do nothing guesser 2019-04-23 21:50:45 +01:00
e1124d9572 Integrator verbosity updates 2019-04-23 21:50:15 +01:00
d416156c16 Mobius 2+1f sign off. 2019-04-19 07:57:08 +01:00
cd8d939a1a Integrator logging on by default 2019-04-19 07:54:17 +01:00
760cfe294c RHMC for mobius 2019-04-19 07:53:54 +01:00
13eaf21b5c HMC make file 2019-04-18 11:53:26 +01:00
1403ab231b Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-04-18 11:06:02 +01:00
0368fbcde8 Update 2019-04-18 11:05:53 +01:00
2dd0ec7862 Merge pull request #186 from djm2131/feature/eofa-bug-fixes
Merge feature/eofa-bug-fixes into develop
2019-04-17 14:54:06 +01:00
f4241e59ba Merge pull request #200 from mmphys/feature/XcodeDoc
Updated documentation after Peter's review.
2019-04-17 14:51:19 +01:00
26b1d2df2d Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-04-17 12:08:06 +01:00
bc14e86812 Simple check 2019-04-17 12:07:42 +01:00
780a67844e Simple checks 2019-04-17 12:07:17 +01:00
8b7805200f Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-04-17 12:05:09 +01:00
2871dec6c0 Monius HMC 2019-04-17 12:04:57 +01:00
abde12433e Changes locally 2019-04-17 12:03:20 +01:00
1f88ba4e39 Power method 2019-04-17 12:03:05 +01:00
ea5b3ed8a2 Momentum rescaling 2019-04-17 12:01:06 +01:00
a104115c7d Bounds checking 2019-04-17 11:56:46 +01:00
b899042d81 Bounds checking 2019-04-17 11:55:43 +01:00
3e712fe643 Scale momentum convention to CPS/UKQCD MD time 2019-04-17 11:54:17 +01:00
f4723e07c5 Add bounds checking 2019-04-17 11:52:23 +01:00
9ed2d02bb2 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-04-12 12:11:06 +01:00
50d016340c Merge pull request #190 from mmphys/feature/distil-checkin
Eigen::Tensor serialisation. Tested on single and double precision builds
2019-04-10 12:49:06 +01:00
f7b4fd0f69 Make sure Grid::Serializable can write Eigen Tensors to output streams. NB:
1) The Eigen package defines operator<< for Eigen tensors, but this format is different, hence Grid::Serializable::WriteMember
2) For simplification, the contents are written in memory order. I.e. Different results will be obtained depending on whether the tensor is row- or column-major
2019-04-06 15:40:23 +01:00
1f1aa92f14 Updated documentation after Peter's review.
1) Removed version numbers from Grid dependencies
2) Explained in a little more detail how to use Xcode to build Grid and Hadrons libraries
2019-04-06 13:42:39 +01:00
00963a7499 twist and boundary conditions for free propagator 2019-04-05 10:08:27 +01:00
82a77f9960 ... this time without the new Distillation modules ... 2019-04-03 23:02:26 +01:00
00b4139c16 Eigen tensor serialisation fixes after Antonin's review 2019-04-03 22:48:07 +01:00
3e9c757b3b Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-04-03 16:56:48 +01:00
ecf736e6bf Merge pull request #193 from nils-asmussen/fix_GaugeFix_inputmod
Fixes #192 and adds gauge transformation matrix as output.
2019-04-02 18:28:50 +01:00
f22ab5e1bc Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-04-02 11:29:59 +01:00
72f959c0b8 MGauge::GaugeFix use standard convention for fields 2019-03-29 16:51:21 +00:00
63001d3fa6 fix bug: MGauge::GaugeFix should not modify its input 2019-03-29 16:51:11 +00:00
b1d3d1f1a9 add gauge transformation matrix as output to module MGauge/GaugeFix 2019-03-29 16:51:00 +00:00
c2250fa124 MFermion::GaugeProp fix for 4d fields 2019-03-29 16:36:56 +00:00
84940fbdf0 Hadrons: Lepton Propagator for kl2 2019-03-28 10:15:09 +00:00
0a270b3e93 Merge pull request #191 from mmphys/GridXcode
Documentation for using Grid with Xcode on Mac OS
2019-03-27 22:53:35 +00:00
6536bed8a4 Documentation for using Grid with Xcode on Mac OS 2019-03-27 20:51:20 +00:00
79160011a1 endianness fix in resilient IO 2019-03-26 16:06:13 +00:00
47f5b1e2b5 Iterator added. Will wait for review comments before finalising. 2019-03-25 18:19:55 +00:00
a381d34f37 Fix build with Intel '17 compiler, i.e. workaround incorrect auto types for c++ style definitions.
E.g. assuming T::rank is an int, then objects defined like so:
    const auto rank{T::rank};
should also be int. Unfortunately, Intel '17 instead defines them to be std::initializer_list<int>, then proceeds to complain where these variables are used that they cannot be converted to int. NB: This was fixed under Intel '18
2019-03-23 09:24:15 +00:00
f0c2108acf Pushed paboyle's changes: Updates for clang happy 2019-03-22 12:59:14 +00:00
93a5fc083f Updates for clang happy 2019-03-22 11:39:22 +00:00
6d1de8ed2e Merge paboyle's no compile in single precision Intel 2019 fix 2019-03-21 16:48:08 +00:00
116dde31eb No compile in single precisoin Intel 2019 fix 2019-03-21 14:13:33 +00:00
12d8bf1ced Eigen::Tensor serialisation. Tested on single and double precision builds 2019-03-20 22:27:41 +00:00
d921a99b1a precision fix 2019-03-19 17:07:40 +00:00
9790926cc5 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-03-19 16:35:48 +00:00
a6adb85a1b Merge pull request #185 from mmphys/feature/trait-recommend
Recommendations for Traits classes
2019-03-18 12:22:42 +00:00
98b5b61fea Remove bundled Eigen stuff 2019-03-15 19:44:28 -04:00
6896c57d7c Fix typo so it matches develop 2019-03-15 19:34:36 -04:00
b3d480a978 Remove bundled source from my local repository 2019-03-15 19:17:03 -04:00
bb731c97d6 Slightly generalize interface to SchurRedBlackBase and derived solver classes so we can pass forecasted initial guesses in EOFA heatbath correctly 2019-03-15 19:10:56 -04:00
974003ae96 Fix sign convention of ExactOneFlavourRatioPseudoFermionAction::deriv() to match force conventions for Integrator class 2019-03-15 19:04:29 -04:00
93348775af Resolved merge conflict 2019-03-15 19:01:37 -04:00
91cffef883 Updates after review with Peter. 2019-03-07 14:30:35 +00:00
d3935ae7fc Hadrons: some updates in WeakMesonDecayKl2 2019-03-06 15:27:59 +00:00
0b426bf9f6 Merge remote-tracking branch 'upstream/develop' into feature/kl2QED
Conflicts:
	Hadrons/Modules.hpp
	Hadrons/modules.inc
2019-03-06 11:28:59 +00:00
acd25d0d01 Wilson clover multi grid for lime lattice 2019-03-04 11:30:15 +00:00
b7db99967a Recommendations for Traits classes 2019-02-28 20:06:59 +00:00
b930eda69d Merge branch 'develop' of github.com:paboyle/Grid into develop 2019-02-27 02:27:46 +00:00
7852181c2c Hadrons: uninitialised pointer fix (might have been harmless) 2019-02-27 02:27:40 +00:00
bdf87bc994 Hadrons: beware of the nasty uninitialised twists 2019-02-27 02:27:09 +00:00
136e7b2314 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-02-26 11:31:36 +00:00
1ea64b24fe Smearing test. Test on free field. 2019-02-26 11:31:17 +00:00
8f661f6c05 Smearing for quark observables 2019-02-26 11:31:00 +00:00
ae9e248c95 Smearing 2019-02-26 11:29:12 +00:00
351ffe73cd Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-02-25 14:06:09 +00:00
6160795a43 dt^2 term comments 2019-02-24 15:23:20 +00:00
ded2d5c3ab HMC directory 2019-02-24 15:22:57 +00:00
04255128ef HMC directory 2019-02-24 15:22:17 +00:00
a9a3248cb5 More precision in rect force test 2019-02-24 15:21:19 +00:00
7c461dc664 Bounds checking plan setup 2019-02-24 15:19:48 +00:00
15fddde9bf ConstEE override in Clover 2019-02-24 14:44:43 +00:00
048397d880 Default tau spacing should be longer c.f. Zbigniew Srocinsky thesis 2019-02-24 14:43:22 +00:00
196c9e4a4a Better conformable check with message 2019-02-24 14:42:52 +00:00
6a0823718e Make ConstEE except override in clover 2019-02-24 14:41:59 +00:00
22476cc5a3 Power method estimator of spectral range 2019-02-24 14:37:56 +00:00
cb16c96dc7 Hadrons: XML validator utility 2019-02-22 18:41:26 +00:00
e37614bde4 display relative norm during field IO norm check 2019-02-14 16:23:50 +00:00
042bad2ced possibility to set a build number 2019-02-14 13:58:17 +00:00
5bc0857412 IO norm check on relative norm 2019-02-10 22:12:47 +00:00
b540dc1cee Output field norm check during IO 2019-02-10 21:41:17 +00:00
7672bb6434 Hadrons: random vector utility module I/O 2019-02-10 21:25:25 +00:00
f80c548365 quieter initialisation 2019-02-10 20:47:35 +00:00
c8bcee6e97 Merge pull request #183 from nils-asmussen/fix-eigen-patch
fix patch command for eigen in bootstrap.sh
2019-02-06 14:36:36 +00:00
6e0d43aef5 fix patch command for eigen in bootstrap.sh 2019-02-06 11:25:51 +00:00
c1257208e2 Mres changes and gauge xform mat changes 2019-02-05 23:43:00 +00:00
74c38822ed Hadrons: 32 bit I/O directly in Lanczos module 2019-02-05 21:56:51 +00:00
318c64adc2 Hadrons: copyright update 2019-02-05 19:13:37 +00:00
d5b053f86f Hadrons: 1 propagator loop construction now using A2A vectors 2019-02-05 19:12:38 +00:00
c60e50e3cb Hadrons: copyright update 2019-02-05 18:55:24 +00:00
08d8b1d5fb Hadrons: 4-quark eye 3-pt contractions 2019-02-05 18:53:20 +00:00
90d6d28547 Hadrons: non-eye weak 3pt fix 2019-02-05 11:35:10 +00:00
9c31305b8d Hadrons: test cleaning 2019-02-04 21:26:25 +00:00
2eb584fdf0 Hadrons: 4-quark non-eye 3-pt contractions 2019-02-04 21:24:07 +00:00
6b46834af8 Hadrons: archiving unmaintained or exotic modules 2019-02-04 21:23:30 +00:00
3692c7f1ef Hadrons: type alias cleaning and global correlator class (need to propagate) 2019-02-04 21:21:51 +00:00
0cf94587cd array with all gammas for convenience 2019-02-04 21:20:16 +00:00
68868c83ff Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2019-01-31 14:46:56 +00:00
9b6ddb6e54 Adding a norm of a general field check, so that for things other than gauge configs there is an analogue of plaquette norm.
Improve argument checking in the BinaryIO.h, as there looks to be some corruption issue intermittently on tesseract jobs.
Not clear where the root bug is.
2019-01-16 22:35:58 +00:00
447b772136 Merge remote-tracking branch 'upstream/develop' into feature/kl2QED 2019-01-07 15:09:18 +00:00
91a7fe247b Merge branch 'DanielRichtmann-feature/wilsonmg' into develop 2019-01-02 14:40:31 +00:00
8a1be021d3 Merge branch 'feature/wilsonmg' of https://github.com/DanielRichtmann/Grid into DanielRichtmann-feature/wilsonmg 2019-01-02 14:39:59 +00:00
fd66325321 pure QED test and copyright update 2018-12-14 17:39:11 +00:00
c637c0c48c James H.'s code for general size Wilson loops 2018-12-14 17:37:09 +00:00
c4b472176c Photon code fix 2018-12-14 17:36:38 +00:00
943fa48ce4 Hadrons: Kl2 contraction using sequential propagators 2018-12-14 13:45:30 +00:00
fa97a56fdd Hadrons: sequential Aslash insertion on propagator 2018-12-14 12:40:26 +00:00
856476a890 big cleanup of the Photon class + QED Coulomb gauge 2018-12-13 21:52:38 +00:00
c509bd3fe2 Merge branch 'feature/resilient-io' into develop 2018-12-01 12:57:43 +00:00
49b934310b resilient I/O fix 2018-11-27 20:17:09 +00:00
01e8cf5017 Merge branch 'develop' into feature/resilient-io 2018-11-27 19:09:59 +00:00
12f4499502 HDF5 serialiser fix 2018-11-27 19:09:50 +00:00
05aec72887 Hadrons: application parameter for resilient I/O 2018-11-27 18:46:43 +00:00
136d3802cb binary parallel IO can do read tests and eventually re-write in case of failure 2018-11-27 18:38:24 +00:00
a4c55406ed checksummed HDF5 IO 2018-11-27 17:43:19 +00:00
c7f33ca2a8 Revert "Hadrons: A2A vector write can fail and retry"
This reverts commit 10fc263675.
2018-11-27 17:27:26 +00:00
0e3035c51d Revert "optional non-fatal checksum fail in Lime lattice read (with error codes)"
This reverts commit bccfd4cbb3.
2018-11-27 17:27:20 +00:00
10fc263675 Hadrons: A2A vector write can fail and retry 2018-11-26 19:47:03 +00:00
bccfd4cbb3 optional non-fatal checksum fail in Lime lattice read (with error codes) 2018-11-26 19:45:51 +00:00
0b50d4a328 log time fix 2018-11-23 15:51:27 +00:00
b74940b3d4 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-11-23 14:08:29 +00:00
e232257cb6 Hadrons: A2AAslashVector modul cleaning and renaming 2018-11-22 19:43:49 +00:00
09451b5e48 Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-11-22 15:45:24 +00:00
6364aa8acf Merge branch 'feature/contractor' into develop 2018-11-22 15:44:46 +00:00
b9e84ecab7 Hadrons: minor code cleaning 2018-11-22 15:44:30 +00:00
41032fef44 Optional RW mode for Hdf5Reader 2018-11-21 18:36:50 +00:00
d77bc88170 Optional support for faster CRC32C checksum through Intel IPP 2018-11-19 17:21:53 +00:00
494b3c9e57 Hadrons: contractor more IO fix 2018-11-19 16:26:53 +00:00
2ba19a9e07 Hadrons: contractor IO fix 2018-11-19 16:17:51 +00:00
5d7cc29eaf Hadrons: contractor token @traj@ for trajectory number in input file 2018-11-19 16:04:01 +00:00
f22a27d7f9 Hadrons: contractor trajectory loop and file output 2018-11-19 15:45:04 +00:00
33a0bbb17b Const correctness 2018-11-19 11:27:57 +00:00
f592ec8baa Hadrons: contractor performance fix 2018-11-16 20:59:49 +00:00
8b007b5c24 Hadrons: remove the use of OpenMP reductions 2018-11-16 20:00:29 +00:00
17b3f47b1e Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-11-16 16:32:12 +00:00
9bb170576d Merge pull request #177 from guelpers/develop
Hadrons module to electrify a gauge
2018-11-14 16:04:09 +00:00
a7e3977b75 Merge remote-tracking branch 'upstream/develop' into develop 2018-11-13 14:56:23 +00:00
995f20e45d Hadrons: some renamings 2018-11-13 14:54:48 +00:00
d058b4e681 Merge branch 'feature/seqA2A' into develop 2018-11-13 13:27:24 +00:00
8e0d2f3402 Hadrons: support for twisted boundary conditions 2018-11-12 17:16:18 +00:00
2ac57370f1 Hadrons: contractor translation average normalisation 2018-11-12 16:04:35 +00:00
344e832a4e Hadrons: contractor faster transpose and finer timings 2018-11-12 15:59:54 +00:00
cfe281f1a4 Hadrons: diskvectors measure hash performance in debug output 2018-11-12 15:59:11 +00:00
f5422c7334 Hadrons: more contractor instrumentation 2018-11-09 16:23:53 +00:00
68c76a410d Hadrons: more contractor improvements 2018-11-08 19:24:29 +00:00
69b6ba0a73 Hadrons: contractor fixes and improvements 2018-11-08 18:46:28 +00:00
65349b07a7 Hadrons: simpler A2A perf functions 2018-11-08 18:44:44 +00:00
7cd9914f0e Hadrons: automatically resize output in MKL A2A matrix kernels 2018-11-08 17:40:57 +00:00
f3f24b3017 Optional Twisted BC's added, in "DoubleStore" for WilsonImpl.
Untested but doesn't affect answers when twists are all zero. The zero is the default behaviour
for ImplParams.
2018-11-08 12:55:25 +00:00
8ef4657805 Merge remote-tracking branch 'upstream/develop' into feature/seqA2A 2018-11-08 09:00:06 +00:00
78c1086f8b Hadrons: sequential Aslash insertion and propagator on A2A vector 2018-11-08 08:58:09 +00:00
68c13045d6 Added a test for Felix and Michael to look at 2018-11-07 23:40:15 +00:00
e9b6f58fdc Allow shrinking machine in orthog direction for extract slice local 2018-11-07 23:39:18 +00:00
839605c45c Verbose reduce 2018-11-07 23:38:46 +00:00
1ff1422e07 Hadrons: contractor lighter output 2018-11-07 20:02:53 +00:00
32376f0437 Hadrons: contractor performances 2018-11-07 19:59:11 +00:00
0c6e581336 Hadrons: first stab at general contraction code, needs serious testing 2018-11-07 19:16:55 +00:00
e0a79a5bbf Hadrons: PR#177: Electrify gauge: Single Precision fix 2018-11-07 15:01:22 +00:00
4c016cc1a4 Merge remote-tracking branch 'upstream/develop' into develop 2018-11-07 14:03:12 +00:00
2205b1e63e Add CXX to grid-config 2018-11-07 13:32:46 +00:00
6f421c7a6f Block solver in the SchurRedBlack plus timing report cleaner 2018-11-07 12:26:56 +00:00
b62b9ac214 Patch to broken assertion 2018-11-06 22:18:17 +00:00
88d9922e4f Hadrons: fast A2A matrix contraction kernels 2018-11-06 19:49:09 +00:00
9734e3ee58 Hadrons: (somewhat) faster build 2018-11-06 19:47:41 +00:00
8c3a599148 Block solver test 2018-11-06 16:44:58 +00:00
4a47b11876 Block CG improvements to develop 2018-11-06 12:49:05 +00:00
f1382cf81d Merge remote-tracking branch 'upstream/develop' into develop 2018-11-06 10:29:52 +00:00
85699daef2 Hadrons: Module to electrify a gauge field 2018-11-06 10:27:18 +00:00
1651111d18 Hadrons: final, portable form of the contractor benchmark 2018-11-05 21:29:13 +00:00
1ed4ea344d Merge branch 'develop' into feature/contractor 2018-11-05 11:42:02 +00:00
8f514ae550 Hadrons: Lanczos 32bit IO 2018-11-05 11:41:10 +00:00
4a7415e83c Hadrons: contractor benchmark update 2018-10-23 21:00:54 +01:00
0ffcfea724 Hadrons: contractor benchmark 2018-10-23 17:08:16 +01:00
febe41cc1d Hadrons: improvement on PR #176 2018-10-23 12:48:15 +01:00
62173395b8 Merge pull request #176 from guelpers/develop
Hadrons: full volume noise source for A2A
2018-10-23 12:29:35 +01:00
b48611b80f Merge branch 'develop' into feature/contractor 2018-10-22 18:27:18 +01:00
6b559d68aa Hadrons: eigenpack converter can do test reads 2018-10-22 11:10:18 +01:00
1982cc58dd Hadrons: A2A vectors I/O filename fix 2018-10-21 01:20:05 +01:00
2e2e5ce596 SciDAC I/O print data checksums 2018-10-19 20:36:32 +01:00
7d84dca8e9 Merge branch 'develop' into feature/contractor 2018-10-18 23:46:58 +01:00
2d3916418e Hadrons: more precision fix 2018-10-18 23:45:13 +01:00
21304e2139 Hadrons: fix to allow single-prec build again 2018-10-18 19:58:50 +01:00
7b850eb48b Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-10-18 19:46:25 +01:00
a3ace57e01 Hadrons copyright update 2018-10-18 19:46:11 +01:00
b1c3cbe35e Hadrons: A2A vectors I/O 2018-10-18 19:44:58 +01:00
f31d6bfec2 Hadrons: contractor cleaning and better error check 2018-10-18 17:50:35 +01:00
a7cfa26901 Hadrons: reverse A2A matrix load for better DiskVector cache reuse 2018-10-18 17:50:16 +01:00
f333f3e575 Hadrons: DiskVector save-on-eviction and faster CRC32 for Eigen matrices 2018-10-18 17:48:25 +01:00
2b4e253473 Merge branch 'develop' of github.com:paboyle/Grid into develop 2018-10-17 20:28:20 +01:00
0ba3d469c7 Benchmark IO in single and double precision 2018-10-17 20:27:34 +01:00
f709329d96 Hadrons: first version of a contractor utility 2018-10-17 20:26:48 +01:00
f05b25dae4 Hadrons: A2AMatrix load 2018-10-17 20:26:26 +01:00
3e1d268fa3 Hadrons: DiskVector optimisation 2018-10-17 20:25:32 +01:00
109c74bed8 Hadrons: full volume noise source for A2A 2018-10-16 14:56:12 +01:00
3023287fd9 Hadrons: 3-index RO access to Eigen disk vector 2018-10-16 14:44:14 +01:00
b3d6805638 Merge branch 'feature/contractor' into develop 2018-10-16 11:29:37 +01:00
291bc2a1f0 IO benchmark on a list of directories 2018-10-15 17:25:08 +01:00
2f368c33fc Hadrons: copyright update 2018-10-15 15:51:45 +01:00
9592115341 Hadrons: NPR and gauge fixing linking fix 2018-10-15 15:49:42 +01:00
24c07694bc Mixed precision now supported in MADWF 2018-10-14 00:22:52 +01:00
f0229025e2 MADWF working across a range of actions 2018-10-13 19:55:03 +01:00
6de9a45a09 NPR first cut by Julia Kettle 2018-10-12 11:00:58 +01:00
03c3d495a2 First cut (non functional NPR code) developed by Julia Kettle 2018-10-12 10:59:33 +01:00
49f25e08e8 PauliVillars based 4D -> 5D reconstruction with Fourier Accelerated PV inverse
by Christoph. Differs from the one by Rudy in BFM since it vectorises the twisted
4D solves in pairs.
2018-10-11 12:35:32 +01:00
efc0c65056 Hadrons: DiskVector Eigen specialisation with binary I/O and sha256 correctness check 2018-10-08 19:02:00 +01:00
936eaac8e1 function to get the sha256 string 2018-10-08 19:00:50 +01:00
fe6a372f75 Hadrons: fixes and cleaning in the scalar SU(N) part 2018-10-08 15:14:08 +01:00
dac9f8622e Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-10-08 10:12:11 +01:00
148fc052bd Hadrons: Aslash field, tested 2018-10-05 21:04:10 +01:00
c073341a10 Hadrons: more cleaning 2018-10-05 19:50:41 +01:00
78299daaac Hadrons: code cleaning 2018-10-05 16:47:52 +01:00
866449c804 Hadrons: integration of Peter's A2Autils 2018-10-05 16:42:44 +01:00
d69a52079f Merge remote-tracking branch 'gh/feature/a2a-integration' into feature/aslashfield 2018-10-05 15:39:09 +01:00
9f4f8a14a3 Hadrons: code cleaning 2018-10-05 15:38:01 +01:00
f6593dc881 Hadrons: A2A block performance counter fix 2018-10-05 15:11:01 +01:00
58567fc650 Hadrons: big update abstracting the block meson field routine, tested & working, performance counters broken and code dirty 2018-10-04 20:01:49 +01:00
d0b21bf1ff Merge branch 'feature/eigenpack-convert' into develop 2018-10-04 18:26:45 +01:00
a1825d1f59 Hadrons: final fix for multiprec eigenpacks 2018-10-04 18:25:26 +01:00
5a3e83ff7b Hadrons: new layer in eigenpacks class hierarchy 2018-10-03 14:45:01 +01:00
52569d98d8 Hadrons: multiprec eigenpack I/O fix 2018-10-03 14:24:43 +01:00
b351103c29 Hadrons: eigenpack load module with 32bit I/O 2018-10-02 21:07:56 +01:00
118cca4681 Hadrons: linking fix 2018-10-02 20:08:49 +01:00
44de727cd2 Hadrons: eigenpack support for multiprecision I/O 2018-10-02 19:51:09 +01:00
d9de8fd5c9 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2018-09-17 12:19:47 +01:00
7e3647246c Merge branch 'master' of https://github.com/paboyle/Grid into develop 2018-09-17 12:19:20 +01:00
2881b3e8e5 WilsonMG: Remove unnecessary static assertions 2018-06-26 14:42:30 +02:00
cc5d025ea4 WilsonMG: Adapt staggered GMRES/MR tests to "new" constructor 2018-06-18 16:20:20 +02:00
ddcb53bce2 Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg 2018-06-13 09:50:37 +02:00
d1c80e1d46 WilsonMG: Correct years in copyright line 2018-06-13 09:44:09 +02:00
c73cc7d354 WilsonMG: Add tests with MG preconditioner running single precision, outer solver running in double 2018-06-12 16:10:48 +02:00
49fdc324a0 WilsonMG: Make MG correctness checks abort on failing tests 2018-06-12 16:10:48 +02:00
f32714a2d1 WilsonMG: Make running MG correctness checks optional via commandline 2018-06-12 16:10:48 +02:00
73a955be20 WilsonMG: Move tests for Wilson & WilsonClover into separate files 2018-06-12 16:10:48 +02:00
66b7a0f871 WilsonMG: Move multigrid class to separate file 2018-06-12 16:10:48 +02:00
2ab9d4bc56 WilsonMG: Fix random behavior in GMRES
From time to time I saw random since the basis vectors were not initialized
properly.
2018-06-12 15:01:31 +02:00
4f41cd114d WilsonMG: Add a mixed precision version of FGMRES
This version does everything in double prec but accepts a preconditioner working
in single precision.
2018-06-12 15:01:31 +02:00
11c4f5e32c WilsonMG: Provide command line switch for reading in input xml + move default params to constructor of MultiGridParams 2018-06-12 15:01:31 +02:00
e9b9550298 WilsonMG: Fix incompatibility with single prec MG in construction of simd layout on coarser grids 2018-06-12 15:01:31 +02:00
7564fedf68 WilsonMG: Set subspace to zero to avoid random behavior 2018-06-12 15:01:31 +02:00
251b904a28 Merge branch 'release/ISC-freeze-2' 2018-06-04 21:09:48 +01:00
5a112feac3 Merge branch 'release/ISC-freeze-1' 2018-06-04 18:49:40 +01:00
6c27c72585 WilsonMG: Provide more sensible default values for MG parameters 2018-05-16 17:26:09 +02:00
9c003d2d72 WilsonMG: Base wilson mg preconditioner entirely on existing infrastructure 2018-05-16 17:26:09 +02:00
4b8710970c WilsonMG: Switch to Galerkin coarsening in CoarsenedMatrix 2018-05-16 17:26:09 +02:00
68d686ec38 WilsonMG: Add functionality for applying G5 on coarse grids 2018-05-16 16:17:14 +02:00
c48b69ca81 WilsonMG: Implement Mdir & Mdiag in CoarsenedMatrix 2018-05-16 16:08:05 +02:00
df8c208f5c WilsonMG: Revert CoarsenedMatrix.h and Lattice_transfer.h back to state of develop branch 2018-05-16 16:02:54 +02:00
61812ab7f1 Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg 2018-05-15 14:57:18 +02:00
73ced656eb Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg 2018-04-03 17:51:11 +02:00
f69008edf1 WilsonMG: Add functionality to report timings to MG preconditioner 2018-04-03 17:26:49 +02:00
57a49ed22f WilsonMG: Read in MG parameters from xml in test 2018-04-03 16:03:11 +02:00
ff6413a764 WilsonMG: Make number of levels chooseable at runtime
I don't like this solution though :(
2018-04-03 15:57:33 +02:00
2530bfed01 WilsonMG: Move params instance from global scope to test main function 2018-04-03 14:50:48 +02:00
74f79c5ac7 Revert "Add function to return full type as std::string"
This reverts commit 1cb745c8dc.
2018-03-29 12:03:50 +02:00
58c30c0cb1 WilsonMG: Add conformability checks in MG preconditioner 2018-03-28 13:24:39 +02:00
917a92118a WilsonMG: Move operator test to MG testing routine 2018-03-28 12:19:25 +02:00
04f9cf088d WilsonMG: Add more parameters to MultiGridParams struct 2018-03-27 17:13:11 +02:00
99107038f9 WilsonMG: Rationalize the level counting strategy 2018-03-27 17:06:33 +02:00
b78456bdf4 WilsonMG: Get rid of explicit include of GCR header 2018-03-26 15:41:53 +02:00
08543b6b11 WilsonMG: Provide a switch between V- and K-cycle 2018-03-26 15:37:17 +02:00
63ba33371f WilsonMG: Some minor refactoring 2018-03-26 15:34:53 +02:00
683a7d2ddd WilsonMG: Move comment to make clang-format happy 2018-03-26 14:59:40 +02:00
afdcbf79d1 Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg 2018-03-23 21:13:50 +01:00
3c3ec4e267 WilsonMG: Move tests for Wilson & WilsonClover into the same file 2018-03-23 21:12:27 +01:00
bbe1d5b49e WilsonMG: Temporarily use GMRES in construction of basis vectors
This can go back to CG once Mdag in CoarsenedMatrix works.
2018-03-23 20:02:27 +01:00
0f6009a29f WilsonMG: Huge refactor into something that could be considered an algorithm 2018-03-23 19:55:43 +01:00
1cfed3de7c WilsonMG: Add new logger for MG 2018-03-23 19:55:16 +01:00
edbc0d49d7 WilsonMG: Get rid of explicit GridTypeMappers in CoarsenedMatrix 2018-03-22 16:38:24 +01:00
ee5cf6c8c5 WilsonMG: Some minor changes to GMRES implementations 2018-03-16 13:10:45 +01:00
a66cecc509 WilsonMG: Fix invalid call to MR ctor 2018-03-09 17:34:29 +01:00
0f6cdf3d4b WilsonMG: Implement missing parts of CoarsenedMatrix 2018-03-09 16:56:16 +01:00
1e63b73a14 WilsonMG: Some cleanup/formatting 2018-03-09 16:50:19 +01:00
6ab60c5b70 Merge remote-tracking branch 'upstream/develop' into feature/wilsonmg 2018-02-08 23:59:07 +01:00
8c692b7ffd WilsonMG: Comment assertion on hermiticity of coarse operator for now
TODO: Think of a way to not break dwf_hdcr by doing that. It's only an assertion
but it still interferes with it.
2018-02-08 23:55:05 +01:00
2976132bdd Add first version of multigrid for wilson clover analogous to wilson one
Just like the wilson one, this algorithm

• is currently only a 2-level method since I don't have correct implementations
  for Mdir and Mdiag in CoarsenedMatrix yet (needed for further coarsening)
• needs levelization and refactoring into a proper algorithm
2018-02-08 23:52:10 +01:00
48177f2f2d Add tests for all MR|GMRES solvers with wilson clover action 2018-02-08 23:52:09 +01:00
c4ce70a821 WilsonMG: Major cleanup 2018-02-08 23:52:08 +01:00
a3e009ba54 Add tests for CAGMRES solvers with staggered action 2018-02-08 17:46:28 +01:00
eb7cf239d9 Print warning messages in CAGMRES solvers
Currently, the implementation of these algorithms doesn't differ from their non
communication-avoiding versions.
2018-02-08 17:43:47 +01:00
13ae371ef8 Make solver parameters match in all MR|GMRES solver tests 2018-02-08 17:33:10 +01:00
9f79a87102 Fix bugs in Flexible GMRES solvers
Somehow I got the left and right-preconditioned versions of GMRES mixed up. As
of now this is right-preconditioned version, which is what we want.
2018-02-08 16:00:31 +01:00
4ded1ceeb0 Make GMRES solvers perform no more than MaxIterations steps
I noticed that it was possible to overrun this number.
2018-02-08 15:29:44 +01:00
8bc12e0ce1 Remove superfluous comments in MR solver 2018-02-07 18:09:09 +01:00
cc2f00f827 Remove test for MR solver with dwf action as it doesn't converge 2018-02-07 18:09:08 +01:00
cd61e2e6d6 Increase max iterations in test of MR solver with staggered action 2018-02-07 18:09:07 +01:00
323ed1a588 Add an overrelaxation parameter to the MR solver 2018-02-07 18:09:06 +01:00
68c66d2e4b Remove empty line in output of *Residual* solvers 2018-02-07 18:08:56 +01:00
1671adfd49 WilsonMG: Add some tests for linear operators 2018-02-07 17:15:22 +01:00
871649238c WilsonMG: Stricter naming for linear operators 2018-02-01 14:43:08 +01:00
7c86d2085b WilsonMG: Some minor cleanup 2018-02-01 12:24:16 +01:00
9292be0b69 WilsonMG: Add check for Mdiag + Σ Mdir == M
Need to test my implementations of CoarsenedMatrix::Mdiag &
CoarsenedMatrix::Mdir.
2018-01-31 14:03:30 +01:00
10141f90c9 WilsonMG: Rename test file 2018-01-30 10:25:09 +01:00
a414430817 Merge remote-tracking branch 'upstream/develop' into feature/ddalphaamg 2018-01-29 18:32:31 +01:00
f20728baa9 WilsonMG: Some further steps towards a three level method
Currently this is very "manual" as we are still testing stuff. Will refactor
and make it an algorithm once everything works.

What currently does work:

  - All tests in MultiGridPreconditioner::runChecks for the first coarse grid
  - The tests for the intergrid operators going from the first to the second
    coarse grid
    - (1 - P R) v   == 0
    - (1 - R P) v_c == 0
  - A full solve with VPGCR and a two-level MG preconditioner

What hinders the rest of the tests from passing with a three-level method is the
absence of implementations of CoarsenedMatrix::Mdir and CoarsenedMatrix::Mdiag.
2018-01-29 18:29:49 +01:00
d2e68c4355 WilsonMG: Perform some minor cleanup 2018-01-29 18:07:10 +01:00
1cb745c8dc Add function to return full type as std::string
Also works for nested templates. I find it useful for debugging.
Possible usage:

std::cout << "getTypename<AType>() = " << getTypename<Atype>() << std::endl;
std::cout << "getTypename<decltype(AnInstance)>() = " << getTypename<decltype(AnInstance)>() << std::endl;
2018-01-29 17:39:19 +01:00
faf4278019 Use 2 passes of GS in coarse operator construction 2018-01-29 17:21:42 +01:00
194e4b94bb Make MG checking function work level-wise 2018-01-29 17:18:20 +01:00
bfc1411c1f Use more iterations in subspace creation 2018-01-29 17:11:29 +01:00
161637e573 Turn on orthogonality checking temporarily 2018-01-29 17:10:05 +01:00
04f92ccddf WilsonMG: Provide a fix for the previous commit; compiles and runs successfully now
I don't like the solution with the temporary very much though ...
2018-01-22 14:56:48 +01:00
3b2d805398 WilsonMG: Some first steps towards coarse spin dofs; not compiling yet
A failing conversion from the innermost type (Grid::Simd<...>) to a coarse
scalar (triple iScalar) in blockPromote prohibits this commit from working.
2018-01-22 12:45:51 +01:00
9dc885d297 Fix a bug in Wilson MG
The calculation of the lattice size of a second coarse level was incorrect.
2018-01-18 17:02:04 +01:00
a70c1feecc Remove some unnecessary stuff in Wilson MG 2018-01-18 15:48:28 +01:00
38328100c9 Implement correctness checks for Wilson MG 2018-01-18 15:43:15 +01:00
9732519c41 Apply clang-format to Wilson MG
I can provide the configuration file I used if people want that.
2018-01-18 15:14:37 +01:00
fa4eeb28c4 Save current state in Wilson MG test file 2018-01-17 17:56:34 +01:00
10f7a17ae4 Make timing in VPGCR more detailed 2018-01-11 13:42:18 +01:00
26f14d7dd7 Adapt output format of non-herm solvers to the one of VPGCR 2018-01-11 13:36:30 +01:00
73434db636 Merge remote-tracking branch 'upstream/develop' into feature/ddalphaamg 2018-01-09 10:43:33 +01:00
c6411f8514 Merge remote-tracking branch 'upstream/develop' into feature/ddalphaamg 2018-01-08 10:37:10 +01:00
6cf635d61c Remove some old code in Wilson MG 2017-12-22 13:20:09 +01:00
39558cce52 Multiply TVs in Wilson MG with G5 instead of G5R5 2017-12-22 13:07:56 +01:00
df152648d6 Fix error in MR code when compiling for single precision 2017-12-06 18:00:58 +01:00
7a0c9e84f8 Fix HDF5 src 2017-11-29 18:03:03 -05:00
caf1a3c85d Also add HDF5 src 2017-11-29 17:58:02 -05:00
21ca730a49 Also import some dependcies 2017-11-29 17:34:40 -05:00
c6cd27e9b2 Also import Eigen.inc 2017-11-29 17:26:20 -05:00
6068411d61 Remove Eigen from gitignore 2017-11-29 17:24:40 -05:00
4e965c168e Implement analogon to test vector analysis in WMG codebase 2017-11-29 15:05:27 +01:00
f260af546e Save current state 2017-11-28 15:03:02 +01:00
649b8c9aca Save current state 2017-11-24 10:46:20 +01:00
0afa22747d Merge remote-tracking branch 'upstream/develop' into feature/new-solver-algorithms 2017-11-24 10:11:42 +01:00
fa43206c79 Remove some empty lines 2017-11-10 13:48:38 +01:00
a367835bf2 Set everything up for the implementation of FCAGMRES
The current implementation is the exact same code as normal FGMRES. This commit
only sets up the "framework" for the implementation of FCAGMRES, i.e., a test
and an include in the algorithms header file.
2017-11-09 17:30:41 +01:00
d7743591ea Fix some minor formatting errors 2017-11-09 17:28:19 +01:00
c6cbe533ea Set everything up for the implementation of CAGMRES
The current implementation is the exact same code as normal GMRES. This commit
only sets up the "framework" for the implementation of CAGMRES, i.e., a test and
an include in the algorithms header file.
2017-11-09 17:14:44 +01:00
8402ab6cf9 Some minor formatting improvements 2017-11-09 12:52:04 +01:00
c63095345e Remove some superfluous comments 2017-11-09 12:47:20 +01:00
a7ae46b61e Remove some comments 2017-11-08 16:58:20 +01:00
cd63052205 Remove everything preconditioner-related in GMRES code 2017-11-08 16:57:40 +01:00
699d537cd6 Add FGMRES test with staggered fermions 2017-11-08 16:56:42 +01:00
9031f0ed95 Fix a filename in a file header 2017-11-08 16:42:26 +01:00
26b3d441bb Check in forgotten FGMRES test with wilson Fermions 2017-11-08 16:39:11 +01:00
99bc4cde56 Fix an implementation error in FGMRES 2017-11-08 16:38:34 +01:00
e843d83d9d Make z in FGMRES a single Field 2017-11-08 16:38:16 +01:00
0f75ea52b7 First version of FGMRES; not working yet 2017-11-08 16:17:18 +01:00
8107b785cc Rename misunderstood "rsd_sq" to "rsq" in GMRES code 2017-11-08 14:40:03 +01:00
37b777d801 Add test for GMRES solver with staggered fermions 2017-11-08 14:28:48 +01:00
7382787856 Some minor changes 2017-11-08 14:23:55 +01:00
781c611ca0 Perform minor code style fix 2017-11-08 14:22:38 +01:00
b069090b52 Remove a superfluous comment 2017-11-08 13:58:02 +01:00
0c1c1d9900 Set precision and formatting only once in MR code 2017-11-08 13:57:06 +01:00
7f4ed6c2e5 First working version of GMRES + a test for Wilson fermions 2017-11-08 13:56:41 +01:00
56d32a4afb Rename misunderstood "rsd_sq" to "rsq" in MR code 2017-11-08 13:51:08 +01:00
b8ee496ed6 Print some info at start of GMRES 2017-11-08 13:23:41 +01:00
b87416dac4 Fix error with conformable 2017-11-07 15:00:08 +01:00
176bf37372 Remove some commented stuff 2017-11-07 14:57:36 +01:00
b3d342ca22 Remove old implementation of GMRES operator 2017-11-07 10:24:49 +01:00
e1f928398d Save current state 2017-11-07 10:22:41 +01:00
8c579d2d4a Save current state 2017-11-06 18:09:48 +01:00
fc7d07ade0 Correct function signature of body of GMRES outer loop 2017-11-06 17:12:38 +01:00
b3be9195b4 Save one lattice fermion in GMRES code 2017-11-06 17:12:23 +01:00
9e3c187a4d Save current state 2017-11-06 17:05:25 +01:00
8363edfcdb Perform some minor changes to GMRES code 2017-11-06 16:17:44 +01:00
74af31564f Adapt style of wilson GMRES test to style of wilson MR test 2017-11-06 14:06:45 +01:00
e0819d395f Merge remote-tracking branch 'upstream/develop' into feature/new-solver-algorithms 2017-11-06 13:09:36 +01:00
6f81906b00 Add test for the MR solver with staggered fermions; does not converge atm
TODO: Is this a property of staggered or did I do something wrong?
2017-10-30 16:57:55 +01:00
a2d83d4f3d Add test for the MR solver with DW fermions; does not converge atm
TODO: Is this a property of DWF or did I do something wrong?
2017-10-30 16:39:30 +01:00
89bacb0470 Fix path in MR solver header commentary 2017-10-30 16:16:55 +01:00
19010ff66a Merge remote-tracking branch 'upstream/develop' into feature/new-solver-algorithms 2017-10-30 13:16:46 +01:00
5a477ed29e Perform minor style correction 2017-10-27 14:46:18 +02:00
54128d579a Make MR a bit more verbose 2017-10-27 14:45:29 +02:00
e7b1933e88 Add a test for the MR solver 2017-10-27 14:38:57 +02:00
1bad64ac6a Some formatting 2017-10-27 14:35:04 +02:00
15dfa9f663 Change stopping criterion implementation in MR solver + some cleanup 2017-10-27 14:33:25 +02:00
2185b0d651 Correct author in the file 2017-10-27 14:32:38 +02:00
f61c0b5d03 Very early version of MR solver 2017-10-27 14:09:02 +02:00
074db32e54 Fix build of gmres test 2017-10-27 14:08:48 +02:00
d5f661ba70 Save intermediate state 2017-10-25 10:38:26 +02:00
1ab8d5cc13 Save two more files 2017-10-24 16:58:05 +02:00
789e892865 Save current state 2017-10-24 16:58:04 +02:00
53cfa44d7a Save current state 2017-10-24 16:58:03 +02:00
357 changed files with 18481 additions and 4936 deletions

1
.gitignore vendored
View File

@ -114,3 +114,4 @@ gh-pages/
#####################
Grid/qcd/spin/gamma-gen/*.h
Grid/qcd/spin/gamma-gen/*.cc
Grid/util/Version.h

View File

@ -0,0 +1,5 @@
Version : 0.8.0
- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended
- MPI and MPI3 comms optimisations for KNL and OPA finished
- Half precision comms

View File

@ -42,6 +42,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/GridQCDcore.h>
#include <Grid/qcd/action/Action.h>
#include <Grid/qcd/utils/GaugeFix.h>
#include <Grid/qcd/utils/CovariantSmearing.h>
#include <Grid/qcd/smearing/Smearing.h>
#include <Grid/parallelIO/MetaData.h>
#include <Grid/qcd/hmc/HMC_aggregate.h>

View File

@ -48,14 +48,16 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
#include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
#include <Grid/algorithms/iterative/MinimalResidual.h>
#include <Grid/algorithms/iterative/GeneralisedMinimalResidual.h>
#include <Grid/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h>
#include <Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h>
#include <Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h>
#include <Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/iterative/PowerMethod.h>
#include <Grid/algorithms/CoarsenedMatrix.h>
#include <Grid/algorithms/FFT.h>
// EigCg
// Pcg
// Hdcg
// GCR
// etc..
#endif

View File

@ -211,6 +211,7 @@ namespace Grid {
for(int b=0;b<nn;b++){
subspace[b] = zero;
gaussian(RNG,noise);
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
@ -295,13 +296,58 @@ namespace Grid {
return norm2(out);
};
RealD Mdag (const CoarseVector &in, CoarseVector &out){
return M(in,out);
RealD Mdag (const CoarseVector &in, CoarseVector &out){
// // corresponds to Petrov-Galerkin coarsening
// return M(in,out);
// corresponds to Galerkin coarsening
CoarseVector tmp(Grid());
G5C(tmp, in);
M(tmp, out);
G5C(out, out);
return norm2(out);
};
// Defer support for further coarsening for now
void Mdiag (const CoarseVector &in, CoarseVector &out){};
void Mdir (const CoarseVector &in, CoarseVector &out,int dir, int disp){};
void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
conformable(_grid,in._grid);
conformable(in._grid,out._grid);
SimpleCompressor<siteVector> compressor;
Stencil.HaloExchange(in,compressor);
auto point = [dir, disp](){
if(dir == 0 and disp == 0)
return 8;
else
return (4 * dir + 1 - disp) / 2;
}();
parallel_for(int ss=0;ss<Grid()->oSites();ss++){
siteVector res = zero;
siteVector nbr;
int ptype;
StencilEntry *SE;
SE=Stencil.GetEntry(ptype,point,ss);
if(SE->_is_local&&SE->_permute) {
permute(nbr,in._odata[SE->_offset],ptype);
} else if(SE->_is_local) {
nbr = in._odata[SE->_offset];
} else {
nbr = Stencil.CommBuf()[SE->_offset];
}
res = res + A[point]._odata[ss]*nbr;
vstream(out._odata[ss],res);
}
};
void Mdiag(const CoarseVector &in, CoarseVector &out){
Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil
};
CoarsenedMatrix(GridCartesian &CoarseGrid) :
@ -417,7 +463,7 @@ namespace Grid {
std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
#endif
// ForceHermitian();
AssertHermitian();
// AssertHermitian();
// ForceDiagonal();
}
void ForceDiagonal(void) {

View File

@ -178,7 +178,7 @@ namespace Grid {
//////////////////////////////////////////////////////////
template<class Field>
class SchurOperatorBase : public LinearOperatorBase<Field> {
class SchurOperatorBase : public LinearOperatorBase<Field> {
public:
virtual RealD Mpc (const Field &in, Field &out) =0;
virtual RealD MpcDag (const Field &in, Field &out) =0;
@ -211,10 +211,9 @@ namespace Grid {
}
};
template<class Matrix,class Field>
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
protected:
Matrix &_Mat;
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
public:
Matrix &_Mat;
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
virtual RealD Mpc (const Field &in, Field &out) {
Field tmp(in._grid);
@ -380,6 +379,12 @@ namespace Grid {
template<class Field> class OperatorFunction {
public:
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
virtual void operator() (LinearOperatorBase<Field> &Linop, const std::vector<Field> &in,std::vector<Field> &out) {
assert(in.size()==out.size());
for(int k=0;k<in.size();k++){
(*this)(Linop,in[k],out[k]);
}
};
};
template<class Field> class LinearFunction {
@ -421,7 +426,7 @@ namespace Grid {
// Hermitian operator Linear function and operator function
////////////////////////////////////////////////////////////////////////////////////////////
template<class Field>
class HermOpOperatorFunction : public OperatorFunction<Field> {
class HermOpOperatorFunction : public OperatorFunction<Field> {
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Linop.HermOp(in,out);
};

View File

@ -55,6 +55,14 @@ namespace Grid {
template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
public:
virtual GridBase *RedBlackGrid(void)=0;
//////////////////////////////////////////////////////////////////////
// Query the even even properties to make algorithmic decisions
//////////////////////////////////////////////////////////////////////
virtual RealD Mass(void) { return 0.0; };
virtual int ConstEE(void) { return 1; }; // Disable assumptions unless overridden
virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better
// half checkerboard operaions
virtual void Meooe (const Field &in, Field &out)=0;
virtual void Mooee (const Field &in, Field &out)=0;

View File

@ -33,7 +33,7 @@ directory
namespace Grid {
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS };
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS, BlockCGVec, BlockCGrQVec };
//////////////////////////////////////////////////////////////////////////
// Block conjugate gradient. Dimension zero should be the block direction
@ -42,7 +42,6 @@ template <class Field>
class BlockConjugateGradient : public OperatorFunction<Field> {
public:
typedef typename Field::scalar_type scomplex;
int blockDim ;
@ -54,21 +53,15 @@ class BlockConjugateGradient : public OperatorFunction<Field> {
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
Integer PrintInterval; //GridLogMessages or Iterative
BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv)
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv),PrintInterval(100)
{};
////////////////////////////////////////////////////////////////////////////////////////////////////
// Thin QR factorisation (google it)
////////////////////////////////////////////////////////////////////////////////////////////////////
void ThinQRfact (Eigen::MatrixXcd &m_rr,
Eigen::MatrixXcd &C,
Eigen::MatrixXcd &Cinv,
Field & Q,
const Field & R)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
////////////////////////////////////////////////////////////////////////////////////////////////////
//Dimensions
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
@ -85,22 +78,20 @@ void ThinQRfact (Eigen::MatrixXcd &m_rr,
// Cdag C = Rdag R ; passes.
// QdagQ = 1 ; passes
////////////////////////////////////////////////////////////////////////////////////////////////////
void ThinQRfact (Eigen::MatrixXcd &m_rr,
Eigen::MatrixXcd &C,
Eigen::MatrixXcd &Cinv,
Field & Q,
const Field & R)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
sliceInnerProductMatrix(m_rr,R,R,Orthog);
// Force manifest hermitian to avoid rounding related
m_rr = 0.5*(m_rr+m_rr.adjoint());
#if 0
std::cout << " Calling Cholesky ldlt on m_rr " << m_rr <<std::endl;
Eigen::MatrixXcd L_ldlt = m_rr.ldlt().matrixL();
std::cout << " Called Cholesky ldlt on m_rr " << L_ldlt <<std::endl;
auto D_ldlt = m_rr.ldlt().vectorD();
std::cout << " Called Cholesky ldlt on m_rr " << D_ldlt <<std::endl;
#endif
// std::cout << " Calling Cholesky llt on m_rr " <<std::endl;
Eigen::MatrixXcd L = m_rr.llt().matrixL();
// std::cout << " Called Cholesky llt on m_rr " << L <<std::endl;
C = L.adjoint();
Cinv = C.inverse();
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -112,6 +103,25 @@ void ThinQRfact (Eigen::MatrixXcd &m_rr,
////////////////////////////////////////////////////////////////////////////////////////////////////
sliceMulMatrix(Q,Cinv,R,Orthog);
}
// see comments above
void ThinQRfact (Eigen::MatrixXcd &m_rr,
Eigen::MatrixXcd &C,
Eigen::MatrixXcd &Cinv,
std::vector<Field> & Q,
const std::vector<Field> & R)
{
InnerProductMatrix(m_rr,R,R);
m_rr = 0.5*(m_rr+m_rr.adjoint());
Eigen::MatrixXcd L = m_rr.llt().matrixL();
C = L.adjoint();
Cinv = C.inverse();
MulMatrix(Q,Cinv,R);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Call one of several implementations
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -119,14 +129,20 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
if ( CGtype == BlockCGrQ ) {
BlockCGrQsolve(Linop,Src,Psi);
} else if (CGtype == BlockCG ) {
BlockCGsolve(Linop,Src,Psi);
} else if (CGtype == CGmultiRHS ) {
CGmultiRHSsolve(Linop,Src,Psi);
} else {
assert(0);
}
}
virtual void operator()(LinearOperatorBase<Field> &Linop, const std::vector<Field> &Src, std::vector<Field> &Psi)
{
if ( CGtype == BlockCGrQVec ) {
BlockCGrQsolveVec(Linop,Src,Psi);
} else {
assert(0);
}
}
////////////////////////////////////////////////////////////////////////////
// BlockCGrQ implementation:
@ -139,7 +155,8 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
Nblock = B._grid->_fdimensions[Orthog];
/* FAKE */
Nblock=8;
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
X.checkerboard = B.checkerboard;
@ -202,15 +219,10 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
Linop.HermOp(X, AD);
tmp = B - AD;
//std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl;
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
//std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl;
//std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl;
//std::cout << GridLogMessage << " m_C " << m_C<<std::endl;
//std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl;
D=Q;
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
@ -232,14 +244,12 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
MatrixTimer.Start();
Linop.HermOp(D, Z);
MatrixTimer.Stop();
//std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl;
//4. M = [D^dag Z]^{-1}
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
sliceInnerTimer.Stop();
m_M = m_DZ.inverse();
//std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl;
//5. X = X + D MC
m_tmp = m_M * m_C;
@ -257,6 +267,7 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
//7. D = Q + D S^dag
m_tmp = m_S.adjoint();
sliceMaddTimer.Start();
sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
sliceMaddTimer.Stop();
@ -317,152 +328,6 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
IterationsToComplete = k;
}
//////////////////////////////////////////////////////////////////////////
// Block conjugate gradient; Original O'Leary Dimension zero should be the block direction
//////////////////////////////////////////////////////////////////////////
void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
Nblock = Src._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
Psi.checkerboard = Src.checkerboard;
conformable(Psi, Src);
Field P(Src);
Field AP(Src);
Field R(Src);
Eigen::MatrixXcd m_pAp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_alpha = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_beta = Eigen::MatrixXcd::Zero(Nblock,Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,Src,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,Src,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,Psi,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
// Initial search dir is guess
Linop.HermOp(Psi, AP);
/************************************************************************
* Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
************************************************************************
* O'Leary : R = B - A X
* O'Leary : P = M R ; preconditioner M = 1
* O'Leary : alpha = PAP^{-1} RMR
* O'Leary : beta = RMR^{-1}_old RMR_new
* O'Leary : X=X+Palpha
* O'Leary : R_new=R_old-AP alpha
* O'Leary : P=MR_new+P beta
*/
R = Src - AP;
P = R;
sliceInnerProductMatrix(m_rr,R,R,Orthog);
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
RealD rrsum=0;
for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b));
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
MatrixTimer.Start();
Linop.HermOp(P, AP);
MatrixTimer.Stop();
// Alpha
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_pAp,P,AP,Orthog);
sliceInnerTimer.Stop();
m_pAp_inv = m_pAp.inverse();
m_alpha = m_pAp_inv * m_rr ;
// Psi, R update
sliceMaddTimer.Start();
sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog); // add alpha * P to psi
sliceMaddMatrix(R ,m_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
sliceMaddTimer.Stop();
// Beta
m_rr_inv = m_rr.inverse();
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_rr,R,R,Orthog);
sliceInnerTimer.Stop();
m_beta = m_rr_inv *m_rr;
// Search update
sliceMaddTimer.Start();
sliceMaddMatrix(AP,m_beta,P,R,Orthog);
sliceMaddTimer.Stop();
P= AP;
/*********************
* convergence monitor
*********************
*/
RealD max_resid=0;
RealD rr;
for(int b=0;b<Nblock;b++){
rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(Psi, AP);
AP = AP-Src;
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
//////////////////////////////////////////////////////////////////////////
// multiRHS conjugate gradient. Dimension zero should be the block direction
// Use this for spread out across nodes
//////////////////////////////////////////////////////////////////////////
@ -600,6 +465,233 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &
IterationsToComplete = k;
}
void InnerProductMatrix(Eigen::MatrixXcd &m , const std::vector<Field> &X, const std::vector<Field> &Y){
for(int b=0;b<Nblock;b++){
for(int bp=0;bp<Nblock;bp++) {
m(b,bp) = innerProduct(X[b],Y[bp]);
}}
}
void MaddMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X,const std::vector<Field> &Y,RealD scale=1.0){
// Should make this cache friendly with site outermost, parallel_for
// Deal with case AP aliases with either Y or X
std::vector<Field> tmp(Nblock,X[0]);
for(int b=0;b<Nblock;b++){
tmp[b] = Y[b];
for(int bp=0;bp<Nblock;bp++) {
tmp[b] = tmp[b] + (scale*m(bp,b))*X[bp];
}
}
for(int b=0;b<Nblock;b++){
AP[b] = tmp[b];
}
}
void MulMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X){
// Should make this cache friendly with site outermost, parallel_for
for(int b=0;b<Nblock;b++){
AP[b] = zero;
for(int bp=0;bp<Nblock;bp++) {
AP[b] += (m(bp,b))*X[bp];
}
}
}
double normv(const std::vector<Field> &P){
double nn = 0.0;
for(int b=0;b<Nblock;b++) {
nn+=norm2(P[b]);
}
return nn;
}
////////////////////////////////////////////////////////////////////////////
// BlockCGrQvec implementation:
//--------------------------
// X is guess/Solution
// B is RHS
// Solve A X_i = B_i ; i refers to Nblock index
////////////////////////////////////////////////////////////////////////////
void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field> &B, std::vector<Field> &X)
{
Nblock = B.size();
assert(Nblock == X.size());
std::cout<<GridLogMessage<<" Block Conjugate Gradient Vec rQ : Nblock "<<Nblock<<std::endl;
for(int b=0;b<Nblock;b++){
X[b].checkerboard = B[b].checkerboard;
conformable(X[b], B[b]);
conformable(X[b], X[0]);
}
Field Fake(B[0]);
std::vector<Field> tmp(Nblock,Fake);
std::vector<Field> Q(Nblock,Fake);
std::vector<Field> D(Nblock,Fake);
std::vector<Field> Z(Nblock,Fake);
std::vector<Field> AD(Nblock,Fake);
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
RealD sssum=0;
for(int b=0;b<Nblock;b++){ ssq[b] = norm2(B[b]);}
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(B[b]);}
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(X[b]);}
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
/************************************************************************
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
************************************************************************
* Dimensions:
*
* X,B==(Nferm x Nblock)
* A==(Nferm x Nferm)
*
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
*
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
* for k:
* Z = AD
* M = [D^dag Z]^{-1}
* X = X + D MC
* QS = Q - ZM
* D = Q + D S^dag
* C = S C
*/
///////////////////////////////////////
// Initial block: initial search dir is guess
///////////////////////////////////////
std::cout << GridLogMessage<<"BlockCGrQvec algorithm initialisation " <<std::endl;
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
for(int b=0;b<Nblock;b++) {
Linop.HermOp(X[b], AD[b]);
tmp[b] = B[b] - AD[b];
}
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
for(int b=0;b<Nblock;b++) D[b]=Q[b];
std::cout << GridLogMessage<<"BlockCGrQ vec computed initial residual and QR fact " <<std::endl;
///////////////////////////////////////
// Timers
///////////////////////////////////////
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch QRTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
//3. Z = AD
MatrixTimer.Start();
for(int b=0;b<Nblock;b++) Linop.HermOp(D[b], Z[b]);
MatrixTimer.Stop();
//4. M = [D^dag Z]^{-1}
sliceInnerTimer.Start();
InnerProductMatrix(m_DZ,D,Z);
sliceInnerTimer.Stop();
m_M = m_DZ.inverse();
//5. X = X + D MC
m_tmp = m_M * m_C;
sliceMaddTimer.Start();
MaddMatrix(X,m_tmp, D,X);
sliceMaddTimer.Stop();
//6. QS = Q - ZM
sliceMaddTimer.Start();
MaddMatrix(tmp,m_M,Z,Q,-1.0);
sliceMaddTimer.Stop();
QRTimer.Start();
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
QRTimer.Stop();
//7. D = Q + D S^dag
m_tmp = m_S.adjoint();
sliceMaddTimer.Start();
MaddMatrix(D,m_tmp,D,Q);
sliceMaddTimer.Stop();
//8. C = S C
m_C = m_S*m_C;
/*********************
* convergence monitor
*********************
*/
m_rr = m_C.adjoint() * m_C;
RealD max_resid=0;
RealD rrsum=0;
RealD rr;
for(int b=0;b<Nblock;b++) {
rrsum+=real(m_rr(b,b));
rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
std::cout << GridLogIterative << "\t Block Iteration "<<k<<" ave resid "<< sqrt(rrsum/sssum) << " max "<< sqrt(max_resid) <<std::endl;
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
for(int b=0;b<Nblock;b++) Linop.HermOp(X[b], AD[b]);
for(int b=0;b<Nblock;b++) AD[b] = AD[b]-B[b];
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(normv(AD)/normv(B)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
};
}

View File

@ -0,0 +1,244 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
#define GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
namespace Grid {
template<class Field>
class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; // Throw an assert when CAGMRES fails to converge,
// defaults to true
RealD Tolerance;
Integer MaxIterations;
Integer RestartLength;
Integer MaxNumberOfRestarts;
Integer IterationCount; // Number of iterations the CAGMRES took to finish,
// filled in upon completion
GridStopWatch MatrixTimer;
GridStopWatch LinalgTimer;
GridStopWatch QrTimer;
GridStopWatch CompSolutionTimer;
Eigen::MatrixXcd H;
std::vector<std::complex<double>> y;
std::vector<std::complex<double>> gamma;
std::vector<std::complex<double>> c;
std::vector<std::complex<double>> s;
CommunicationAvoidingGeneralisedMinimalResidual(RealD tol,
Integer maxit,
Integer restart_length,
bool err_on_no_conv = true)
: Tolerance(tol)
, MaxIterations(maxit)
, RestartLength(restart_length)
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
, ErrorOnNoConverge(err_on_no_conv)
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
, y(RestartLength + 1, 0.)
, gamma(RestartLength + 1, 0.)
, c(RestartLength + 1, 0.)
, s(RestartLength + 1, 0.) {};
void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular GMRES" << std::endl;
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD cp;
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
Field r(src._grid);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: src " << ssq << std::endl;
MatrixTimer.Reset();
LinalgTimer.Reset();
QrTimer.Reset();
CompSolutionTimer.Reset();
GridStopWatch SolverTimer;
SolverTimer.Start();
IterationCount = 0;
for (int k=0; k<MaxNumberOfRestarts; k++) {
cp = outerLoopBody(LinOp, src, psi, rsq);
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
LinOp.Op(psi,r);
axpy(r,-1.0,src,r);
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "CAGMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "CAGMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "CAGMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "CAGMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "CAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
return;
}
}
std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl;
if (ErrorOnNoConverge)
assert(0);
}
RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
RealD cp = 0;
Field w(src._grid);
Field r(src._grid);
// this should probably be made a class member so that it is only allocated once, not in every restart
std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
MatrixTimer.Start();
LinOp.Op(psi, w);
MatrixTimer.Stop();
LinalgTimer.Start();
r = src - w;
gamma[0] = sqrt(norm2(r));
v[0] = (1. / gamma[0]) * r;
LinalgTimer.Stop();
for (int i=0; i<RestartLength; i++) {
IterationCount++;
arnoldiStep(LinOp, v, w, i);
qrUpdate(i);
cp = std::norm(gamma[i+1]);
std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
<< " residual " << cp << " target " << rsq << std::endl;
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
computeSolution(v, psi, i);
return cp;
}
}
assert(0); // Never reached
return cp;
}
void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
MatrixTimer.Start();
LinOp.Op(v[iter], w);
MatrixTimer.Stop();
LinalgTimer.Start();
for (int i = 0; i <= iter; ++i) {
H(iter, i) = innerProduct(v[i], w);
w = w - H(iter, i) * v[i];
}
H(iter, iter + 1) = sqrt(norm2(w));
v[iter + 1] = (1. / H(iter, iter + 1)) * w;
LinalgTimer.Stop();
}
void qrUpdate(int iter) {
QrTimer.Start();
for (int i = 0; i < iter ; ++i) {
auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
H(iter, i + 1) = tmp;
}
// Compute new Givens Rotation
ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
c[iter] = H(iter, iter) / nu;
s[iter] = H(iter, iter + 1) / nu;
// Apply new Givens rotation
H(iter, iter) = nu;
H(iter, iter + 1) = 0.;
gamma[iter + 1] = -s[iter] * gamma[iter];
gamma[iter] = std::conj(c[iter]) * gamma[iter];
QrTimer.Stop();
}
void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
CompSolutionTimer.Start();
for (int i = iter; i >= 0; i--) {
y[i] = gamma[i];
for (int k = i + 1; k <= iter; k++)
y[i] = y[i] - H(k, i) * y[k];
y[i] = y[i] / H(i, i);
}
for (int i = 0; i <= iter; i++)
psi = psi + v[i] * y[i];
CompSolutionTimer.Stop();
}
};
}
#endif

View File

@ -89,6 +89,8 @@ class ConjugateGradient : public OperatorFunction<Field> {
// Check if guess is really REALLY good :)
if (cp <= rsq) {
std::cout << GridLogMessage << "ConjugateGradient guess is converged already " << std::endl;
IterationsToComplete = 0;
return;
}
@ -104,7 +106,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations*1000; k++) {
for (k = 1; k <= MaxIterations; k++) {
c = cp;
MatrixTimer.Start();
@ -133,7 +135,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
LinalgTimer.Stop();
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl;
<< " residual^2 " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
// Stopping condition
if (cp <= rsq) {
@ -165,8 +167,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
return;
}
}
std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
<< std::endl;
std::cout << GridLogMessage << "ConjugateGradient did NOT converge "<<k<<" / "<< MaxIterations<< std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;

View File

@ -30,8 +30,11 @@ Author: Christopher Kelly <ckelly@phys.columbia.edu>
namespace Grid {
//Mixed precision restarted defect correction CG
template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
template<class FieldD,class FieldF,
typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
public:
RealD Tolerance;
@ -50,7 +53,12 @@ namespace Grid {
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
LinearFunction<FieldF> *guesser;
MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
MixedPrecisionConjugateGradient(RealD tol,
Integer maxinnerit,
Integer maxouterit,
GridBase* _sp_grid,
LinearOperatorBase<FieldF> &_Linop_f,
LinearOperatorBase<FieldD> &_Linop_d) :
Linop_f(_Linop_f), Linop_d(_Linop_d),
Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
OuterLoopNormMult(100.), guesser(NULL){ };
@ -149,6 +157,8 @@ namespace Grid {
}
};
}
#endif

View File

@ -35,7 +35,11 @@ class ZeroGuesser: public LinearFunction<Field> {
public:
virtual void operator()(const Field &src, Field &guess) { guess = zero; };
};
template<class Field>
class DoNothingGuesser: public LinearFunction<Field> {
public:
virtual void operator()(const Field &src, Field &guess) { };
};
template<class Field>
class SourceGuesser: public LinearFunction<Field> {
public:

View File

@ -0,0 +1,256 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
#define GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
namespace Grid {
template<class Field>
class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; // Throw an assert when FCAGMRES fails to converge,
// defaults to true
RealD Tolerance;
Integer MaxIterations;
Integer RestartLength;
Integer MaxNumberOfRestarts;
Integer IterationCount; // Number of iterations the FCAGMRES took to finish,
// filled in upon completion
GridStopWatch MatrixTimer;
GridStopWatch PrecTimer;
GridStopWatch LinalgTimer;
GridStopWatch QrTimer;
GridStopWatch CompSolutionTimer;
Eigen::MatrixXcd H;
std::vector<std::complex<double>> y;
std::vector<std::complex<double>> gamma;
std::vector<std::complex<double>> c;
std::vector<std::complex<double>> s;
LinearFunction<Field> &Preconditioner;
FlexibleCommunicationAvoidingGeneralisedMinimalResidual(RealD tol,
Integer maxit,
LinearFunction<Field> &Prec,
Integer restart_length,
bool err_on_no_conv = true)
: Tolerance(tol)
, MaxIterations(maxit)
, RestartLength(restart_length)
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
, ErrorOnNoConverge(err_on_no_conv)
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
, y(RestartLength + 1, 0.)
, gamma(RestartLength + 1, 0.)
, c(RestartLength + 1, 0.)
, s(RestartLength + 1, 0.)
, Preconditioner(Prec) {};
void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular FGMRES" << std::endl;
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD cp;
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
Field r(src._grid);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: src " << ssq << std::endl;
PrecTimer.Reset();
MatrixTimer.Reset();
LinalgTimer.Reset();
QrTimer.Reset();
CompSolutionTimer.Reset();
GridStopWatch SolverTimer;
SolverTimer.Start();
IterationCount = 0;
for (int k=0; k<MaxNumberOfRestarts; k++) {
cp = outerLoopBody(LinOp, src, psi, rsq);
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
LinOp.Op(psi,r);
axpy(r,-1.0,src,r);
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: Precon " << PrecTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FCAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
return;
}
}
std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl;
if (ErrorOnNoConverge)
assert(0);
}
RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
RealD cp = 0;
Field w(src._grid);
Field r(src._grid);
// these should probably be made class members so that they are only allocated once, not in every restart
std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
std::vector<Field> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
MatrixTimer.Start();
LinOp.Op(psi, w);
MatrixTimer.Stop();
LinalgTimer.Start();
r = src - w;
gamma[0] = sqrt(norm2(r));
v[0] = (1. / gamma[0]) * r;
LinalgTimer.Stop();
for (int i=0; i<RestartLength; i++) {
IterationCount++;
arnoldiStep(LinOp, v, z, w, i);
qrUpdate(i);
cp = std::norm(gamma[i+1]);
std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
<< " residual " << cp << " target " << rsq << std::endl;
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
computeSolution(z, psi, i);
return cp;
}
}
assert(0); // Never reached
return cp;
}
void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
PrecTimer.Start();
Preconditioner(v[iter], z[iter]);
PrecTimer.Stop();
MatrixTimer.Start();
LinOp.Op(z[iter], w);
MatrixTimer.Stop();
LinalgTimer.Start();
for (int i = 0; i <= iter; ++i) {
H(iter, i) = innerProduct(v[i], w);
w = w - H(iter, i) * v[i];
}
H(iter, iter + 1) = sqrt(norm2(w));
v[iter + 1] = (1. / H(iter, iter + 1)) * w;
LinalgTimer.Stop();
}
void qrUpdate(int iter) {
QrTimer.Start();
for (int i = 0; i < iter ; ++i) {
auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
H(iter, i + 1) = tmp;
}
// Compute new Givens Rotation
ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
c[iter] = H(iter, iter) / nu;
s[iter] = H(iter, iter + 1) / nu;
// Apply new Givens rotation
H(iter, iter) = nu;
H(iter, iter + 1) = 0.;
gamma[iter + 1] = -s[iter] * gamma[iter];
gamma[iter] = std::conj(c[iter]) * gamma[iter];
QrTimer.Stop();
}
void computeSolution(std::vector<Field> const &z, Field &psi, int iter) {
CompSolutionTimer.Start();
for (int i = iter; i >= 0; i--) {
y[i] = gamma[i];
for (int k = i + 1; k <= iter; k++)
y[i] = y[i] - H(k, i) * y[k];
y[i] = y[i] / H(i, i);
}
for (int i = 0; i <= iter; i++)
psi = psi + z[i] * y[i];
CompSolutionTimer.Stop();
}
};
}
#endif

View File

@ -0,0 +1,254 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
#define GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
namespace Grid {
template<class Field>
class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; // Throw an assert when FGMRES fails to converge,
// defaults to true
RealD Tolerance;
Integer MaxIterations;
Integer RestartLength;
Integer MaxNumberOfRestarts;
Integer IterationCount; // Number of iterations the FGMRES took to finish,
// filled in upon completion
GridStopWatch MatrixTimer;
GridStopWatch PrecTimer;
GridStopWatch LinalgTimer;
GridStopWatch QrTimer;
GridStopWatch CompSolutionTimer;
Eigen::MatrixXcd H;
std::vector<std::complex<double>> y;
std::vector<std::complex<double>> gamma;
std::vector<std::complex<double>> c;
std::vector<std::complex<double>> s;
LinearFunction<Field> &Preconditioner;
FlexibleGeneralisedMinimalResidual(RealD tol,
Integer maxit,
LinearFunction<Field> &Prec,
Integer restart_length,
bool err_on_no_conv = true)
: Tolerance(tol)
, MaxIterations(maxit)
, RestartLength(restart_length)
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
, ErrorOnNoConverge(err_on_no_conv)
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
, y(RestartLength + 1, 0.)
, gamma(RestartLength + 1, 0.)
, c(RestartLength + 1, 0.)
, s(RestartLength + 1, 0.)
, Preconditioner(Prec) {};
void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD cp;
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
Field r(src._grid);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: guess " << guess << std::endl;
std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: src " << ssq << std::endl;
PrecTimer.Reset();
MatrixTimer.Reset();
LinalgTimer.Reset();
QrTimer.Reset();
CompSolutionTimer.Reset();
GridStopWatch SolverTimer;
SolverTimer.Start();
IterationCount = 0;
for (int k=0; k<MaxNumberOfRestarts; k++) {
cp = outerLoopBody(LinOp, src, psi, rsq);
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
LinOp.Op(psi,r);
axpy(r,-1.0,src,r);
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual: Converged on iteration " << IterationCount
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: Precon " << PrecTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "FGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
return;
}
}
std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual did NOT converge" << std::endl;
if (ErrorOnNoConverge)
assert(0);
}
RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
RealD cp = 0;
Field w(src._grid);
Field r(src._grid);
// these should probably be made class members so that they are only allocated once, not in every restart
std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
std::vector<Field> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
MatrixTimer.Start();
LinOp.Op(psi, w);
MatrixTimer.Stop();
LinalgTimer.Start();
r = src - w;
gamma[0] = sqrt(norm2(r));
v[0] = (1. / gamma[0]) * r;
LinalgTimer.Stop();
for (int i=0; i<RestartLength; i++) {
IterationCount++;
arnoldiStep(LinOp, v, z, w, i);
qrUpdate(i);
cp = std::norm(gamma[i+1]);
std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: Iteration " << IterationCount
<< " residual " << cp << " target " << rsq << std::endl;
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
computeSolution(z, psi, i);
return cp;
}
}
assert(0); // Never reached
return cp;
}
void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
PrecTimer.Start();
Preconditioner(v[iter], z[iter]);
PrecTimer.Stop();
MatrixTimer.Start();
LinOp.Op(z[iter], w);
MatrixTimer.Stop();
LinalgTimer.Start();
for (int i = 0; i <= iter; ++i) {
H(iter, i) = innerProduct(v[i], w);
w = w - H(iter, i) * v[i];
}
H(iter, iter + 1) = sqrt(norm2(w));
v[iter + 1] = (1. / H(iter, iter + 1)) * w;
LinalgTimer.Stop();
}
void qrUpdate(int iter) {
QrTimer.Start();
for (int i = 0; i < iter ; ++i) {
auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
H(iter, i + 1) = tmp;
}
// Compute new Givens Rotation
ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
c[iter] = H(iter, iter) / nu;
s[iter] = H(iter, iter + 1) / nu;
// Apply new Givens rotation
H(iter, iter) = nu;
H(iter, iter + 1) = 0.;
gamma[iter + 1] = -s[iter] * gamma[iter];
gamma[iter] = std::conj(c[iter]) * gamma[iter];
QrTimer.Stop();
}
void computeSolution(std::vector<Field> const &z, Field &psi, int iter) {
CompSolutionTimer.Start();
for (int i = iter; i >= 0; i--) {
y[i] = gamma[i];
for (int k = i + 1; k <= iter; k++)
y[i] = y[i] - H(k, i) * y[k];
y[i] = y[i] / H(i, i);
}
for (int i = 0; i <= iter; i++)
psi = psi + z[i] * y[i];
CompSolutionTimer.Stop();
}
};
}
#endif

View File

@ -0,0 +1,242 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/GeneralisedMinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_GENERALISED_MINIMAL_RESIDUAL_H
#define GRID_GENERALISED_MINIMAL_RESIDUAL_H
namespace Grid {
template<class Field>
class GeneralisedMinimalResidual : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; // Throw an assert when GMRES fails to converge,
// defaults to true
RealD Tolerance;
Integer MaxIterations;
Integer RestartLength;
Integer MaxNumberOfRestarts;
Integer IterationCount; // Number of iterations the GMRES took to finish,
// filled in upon completion
GridStopWatch MatrixTimer;
GridStopWatch LinalgTimer;
GridStopWatch QrTimer;
GridStopWatch CompSolutionTimer;
Eigen::MatrixXcd H;
std::vector<std::complex<double>> y;
std::vector<std::complex<double>> gamma;
std::vector<std::complex<double>> c;
std::vector<std::complex<double>> s;
GeneralisedMinimalResidual(RealD tol,
Integer maxit,
Integer restart_length,
bool err_on_no_conv = true)
: Tolerance(tol)
, MaxIterations(maxit)
, RestartLength(restart_length)
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
, ErrorOnNoConverge(err_on_no_conv)
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
, y(RestartLength + 1, 0.)
, gamma(RestartLength + 1, 0.)
, c(RestartLength + 1, 0.)
, s(RestartLength + 1, 0.) {};
void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD cp;
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
Field r(src._grid);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "GeneralisedMinimalResidual: guess " << guess << std::endl;
std::cout << GridLogIterative << "GeneralisedMinimalResidual: src " << ssq << std::endl;
MatrixTimer.Reset();
LinalgTimer.Reset();
QrTimer.Reset();
CompSolutionTimer.Reset();
GridStopWatch SolverTimer;
SolverTimer.Start();
IterationCount = 0;
for (int k=0; k<MaxNumberOfRestarts; k++) {
cp = outerLoopBody(LinOp, src, psi, rsq);
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
LinOp.Op(psi,r);
axpy(r,-1.0,src,r);
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "GeneralisedMinimalResidual: Converged on iteration " << IterationCount
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "GMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "GMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "GMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "GMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "GMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
return;
}
}
std::cout << GridLogMessage << "GeneralisedMinimalResidual did NOT converge" << std::endl;
if (ErrorOnNoConverge)
assert(0);
}
RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
RealD cp = 0;
Field w(src._grid);
Field r(src._grid);
// this should probably be made a class member so that it is only allocated once, not in every restart
std::vector<Field> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
MatrixTimer.Start();
LinOp.Op(psi, w);
MatrixTimer.Stop();
LinalgTimer.Start();
r = src - w;
gamma[0] = sqrt(norm2(r));
v[0] = (1. / gamma[0]) * r;
LinalgTimer.Stop();
for (int i=0; i<RestartLength; i++) {
IterationCount++;
arnoldiStep(LinOp, v, w, i);
qrUpdate(i);
cp = std::norm(gamma[i+1]);
std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration " << IterationCount
<< " residual " << cp << " target " << rsq << std::endl;
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
computeSolution(v, psi, i);
return cp;
}
}
assert(0); // Never reached
return cp;
}
void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
MatrixTimer.Start();
LinOp.Op(v[iter], w);
MatrixTimer.Stop();
LinalgTimer.Start();
for (int i = 0; i <= iter; ++i) {
H(iter, i) = innerProduct(v[i], w);
w = w - H(iter, i) * v[i];
}
H(iter, iter + 1) = sqrt(norm2(w));
v[iter + 1] = (1. / H(iter, iter + 1)) * w;
LinalgTimer.Stop();
}
void qrUpdate(int iter) {
QrTimer.Start();
for (int i = 0; i < iter ; ++i) {
auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
H(iter, i + 1) = tmp;
}
// Compute new Givens Rotation
ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
c[iter] = H(iter, iter) / nu;
s[iter] = H(iter, iter + 1) / nu;
// Apply new Givens rotation
H(iter, iter) = nu;
H(iter, iter + 1) = 0.;
gamma[iter + 1] = -s[iter] * gamma[iter];
gamma[iter] = std::conj(c[iter]) * gamma[iter];
QrTimer.Stop();
}
void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
CompSolutionTimer.Start();
for (int i = iter; i >= 0; i--) {
y[i] = gamma[i];
for (int k = i + 1; k <= iter; k++)
y[i] = y[i] - H(k, i) * y[k];
y[i] = y[i] / H(i, i);
}
for (int i = 0; i <= iter; i++)
psi = psi + v[i] * y[i];
CompSolutionTimer.Stop();
}
};
}
#endif

View File

@ -0,0 +1,156 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/MinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_MINIMAL_RESIDUAL_H
#define GRID_MINIMAL_RESIDUAL_H
namespace Grid {
template<class Field> class MinimalResidual : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; // throw an assert when the MR fails to converge.
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
RealD overRelaxParam;
Integer IterationsToComplete; // Number of iterations the MR took to finish.
// Filled in upon completion
MinimalResidual(RealD tol, Integer maxit, Real ovrelparam = 1.0, bool err_on_no_conv = true)
: Tolerance(tol), MaxIterations(maxit), overRelaxParam(ovrelparam), ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
Complex a, c;
Real d;
Field Mr(src);
Field r(src);
// Initial residual computation & set up
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
Linop.Op(psi, Mr);
r = src - Mr;
RealD cp = norm2(r);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "MinimalResidual: guess " << guess << std::endl;
std::cout << GridLogIterative << "MinimalResidual: src " << ssq << std::endl;
std::cout << GridLogIterative << "MinimalResidual: mp " << d << std::endl;
std::cout << GridLogIterative << "MinimalResidual: cp,r " << cp << std::endl;
if (cp <= rsq) {
return;
}
std::cout << GridLogIterative << "MinimalResidual: k=0 residual " << cp << " target " << rsq << std::endl;
GridStopWatch LinalgTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
MatrixTimer.Start();
Linop.Op(r, Mr);
MatrixTimer.Stop();
LinalgTimer.Start();
c = innerProduct(Mr, r);
d = norm2(Mr);
a = c / d;
a = a * overRelaxParam;
psi = psi + r * a;
r = r - Mr * a;
cp = norm2(r);
LinalgTimer.Stop();
std::cout << GridLogIterative << "MinimalResidual: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl;
std::cout << GridLogDebug << "a = " << a << " c = " << c << " d = " << d << std::endl;
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
Linop.Op(psi, Mr);
r = src - Mr;
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "MinimalResidual Converged on iteration " << k
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "MR Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MR Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MR Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
if (ErrorOnNoConverge)
assert(true_residual / Tolerance < 10000.0);
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "MinimalResidual did NOT converge"
<< std::endl;
if (ErrorOnNoConverge)
assert(0);
IterationsToComplete = k;
}
};
} // namespace Grid
#endif

View File

@ -0,0 +1,273 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
Copyright (C) 2015
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
#define GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
namespace Grid {
template<class FieldD, class FieldF, typename std::enable_if<getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction<FieldD> {
public:
bool ErrorOnNoConverge; // Throw an assert when MPFGMRES fails to converge,
// defaults to true
RealD Tolerance;
Integer MaxIterations;
Integer RestartLength;
Integer MaxNumberOfRestarts;
Integer IterationCount; // Number of iterations the MPFGMRES took to finish,
// filled in upon completion
GridStopWatch MatrixTimer;
GridStopWatch PrecTimer;
GridStopWatch LinalgTimer;
GridStopWatch QrTimer;
GridStopWatch CompSolutionTimer;
GridStopWatch ChangePrecTimer;
Eigen::MatrixXcd H;
std::vector<std::complex<double>> y;
std::vector<std::complex<double>> gamma;
std::vector<std::complex<double>> c;
std::vector<std::complex<double>> s;
GridBase* SinglePrecGrid;
LinearFunction<FieldF> &Preconditioner;
MixedPrecisionFlexibleGeneralisedMinimalResidual(RealD tol,
Integer maxit,
GridBase * sp_grid,
LinearFunction<FieldF> &Prec,
Integer restart_length,
bool err_on_no_conv = true)
: Tolerance(tol)
, MaxIterations(maxit)
, RestartLength(restart_length)
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
, ErrorOnNoConverge(err_on_no_conv)
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
, y(RestartLength + 1, 0.)
, gamma(RestartLength + 1, 0.)
, c(RestartLength + 1, 0.)
, s(RestartLength + 1, 0.)
, SinglePrecGrid(sp_grid)
, Preconditioner(Prec) {};
void operator()(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
RealD cp;
RealD ssq = norm2(src);
RealD rsq = Tolerance * Tolerance * ssq;
FieldD r(src._grid);
std::cout << std::setprecision(4) << std::scientific;
std::cout << GridLogIterative << "MPFGMRES: guess " << guess << std::endl;
std::cout << GridLogIterative << "MPFGMRES: src " << ssq << std::endl;
PrecTimer.Reset();
MatrixTimer.Reset();
LinalgTimer.Reset();
QrTimer.Reset();
CompSolutionTimer.Reset();
ChangePrecTimer.Reset();
GridStopWatch SolverTimer;
SolverTimer.Start();
IterationCount = 0;
for (int k=0; k<MaxNumberOfRestarts; k++) {
cp = outerLoopBody(LinOp, src, psi, rsq);
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
LinOp.Op(psi,r);
axpy(r,-1.0,src,r);
RealD srcnorm = sqrt(ssq);
RealD resnorm = sqrt(norm2(r));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "MPFGMRES: Converged on iteration " << IterationCount
<< " computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual
<< " target " << Tolerance << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: Precon " << PrecTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
std::cout << GridLogMessage << "MPFGMRES Time elapsed: PrecChange " << ChangePrecTimer.Elapsed() << std::endl;
return;
}
}
std::cout << GridLogMessage << "MPFGMRES did NOT converge" << std::endl;
if (ErrorOnNoConverge)
assert(0);
}
RealD outerLoopBody(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi, RealD rsq) {
RealD cp = 0;
FieldD w(src._grid);
FieldD r(src._grid);
// these should probably be made class members so that they are only allocated once, not in every restart
std::vector<FieldD> v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero;
std::vector<FieldD> z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero;
MatrixTimer.Start();
LinOp.Op(psi, w);
MatrixTimer.Stop();
LinalgTimer.Start();
r = src - w;
gamma[0] = sqrt(norm2(r));
v[0] = (1. / gamma[0]) * r;
LinalgTimer.Stop();
for (int i=0; i<RestartLength; i++) {
IterationCount++;
arnoldiStep(LinOp, v, z, w, i);
qrUpdate(i);
cp = std::norm(gamma[i+1]);
std::cout << GridLogIterative << "MPFGMRES: Iteration " << IterationCount
<< " residual " << cp << " target " << rsq << std::endl;
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
computeSolution(z, psi, i);
return cp;
}
}
assert(0); // Never reached
return cp;
}
void arnoldiStep(LinearOperatorBase<FieldD> &LinOp, std::vector<FieldD> &v, std::vector<FieldD> &z, FieldD &w, int iter) {
FieldF v_f(SinglePrecGrid);
FieldF z_f(SinglePrecGrid);
ChangePrecTimer.Start();
precisionChange(v_f, v[iter]);
precisionChange(z_f, z[iter]);
ChangePrecTimer.Stop();
PrecTimer.Start();
Preconditioner(v_f, z_f);
PrecTimer.Stop();
ChangePrecTimer.Start();
precisionChange(z[iter], z_f);
ChangePrecTimer.Stop();
MatrixTimer.Start();
LinOp.Op(z[iter], w);
MatrixTimer.Stop();
LinalgTimer.Start();
for (int i = 0; i <= iter; ++i) {
H(iter, i) = innerProduct(v[i], w);
w = w - H(iter, i) * v[i];
}
H(iter, iter + 1) = sqrt(norm2(w));
v[iter + 1] = (1. / H(iter, iter + 1)) * w;
LinalgTimer.Stop();
}
void qrUpdate(int iter) {
QrTimer.Start();
for (int i = 0; i < iter ; ++i) {
auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1);
H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1);
H(iter, i + 1) = tmp;
}
// Compute new Givens Rotation
ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
c[iter] = H(iter, iter) / nu;
s[iter] = H(iter, iter + 1) / nu;
// Apply new Givens rotation
H(iter, iter) = nu;
H(iter, iter + 1) = 0.;
gamma[iter + 1] = -s[iter] * gamma[iter];
gamma[iter] = std::conj(c[iter]) * gamma[iter];
QrTimer.Stop();
}
void computeSolution(std::vector<FieldD> const &z, FieldD &psi, int iter) {
CompSolutionTimer.Start();
for (int i = iter; i >= 0; i--) {
y[i] = gamma[i];
for (int k = i + 1; k <= iter; k++)
y[i] = y[i] - H(k, i) * y[k];
y[i] = y[i] / H(i, i);
}
for (int i = 0; i <= iter; i++)
psi = psi + z[i] * y[i];
CompSolutionTimer.Stop();
}
};
}
#endif

View File

@ -0,0 +1,45 @@
#pragma once
namespace Grid {
template<class Field> class PowerMethod
{
public:
template<typename T> static RealD normalise(T& v)
{
RealD nn = norm2(v);
nn = sqrt(nn);
v = v * (1.0/nn);
return nn;
}
RealD operator()(LinearOperatorBase<Field> &HermOp, const Field &src)
{
GridBase *grid = src._grid;
// quickly get an idea of the largest eigenvalue to more properly normalize the residuum
RealD evalMaxApprox = 0.0;
auto src_n = src;
auto tmp = src;
const int _MAX_ITER_EST_ = 50;
for (int i=0;i<_MAX_ITER_EST_;i++) {
normalise(src_n);
HermOp.HermOp(src_n,tmp);
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
RealD vden = norm2(src_n);
RealD na = vnum/vden;
if ( (fabs(evalMaxApprox/na - 1.0) < 0.01) || (i==_MAX_ITER_EST_-1) ) {
evalMaxApprox = na;
return evalMaxApprox;
}
evalMaxApprox = na;
std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
src_n = tmp;
}
assert(0);
return 0;
}
};
}

View File

@ -139,8 +139,11 @@ namespace Grid {
MatTimer.Start();
Linop.HermOpAndNorm(psi,Az,zAz,zAAz);
MatTimer.Stop();
LinalgTimer.Start();
r=src-Az;
LinalgTimer.Stop();
/////////////////////
// p = Prec(r)
/////////////////////
@ -152,8 +155,10 @@ namespace Grid {
Linop.HermOp(z,tmp);
MatTimer.Stop();
LinalgTimer.Start();
ttmp=tmp;
tmp=tmp-r;
LinalgTimer.Stop();
/*
std::cout<<GridLogMessage<<r<<std::endl;
@ -166,12 +171,14 @@ namespace Grid {
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
MatTimer.Stop();
LinalgTimer.Start();
//p[0],q[0],qq[0]
p[0]= z;
q[0]= Az;
qq[0]= zAAz;
cp =norm2(r);
LinalgTimer.Stop();
for(int k=0;k<nstep;k++){
@ -181,12 +188,14 @@ namespace Grid {
int peri_k = k %mmax;
int peri_kp= kp%mmax;
LinalgTimer.Start();
rq= real(innerProduct(r,q[peri_k])); // what if rAr not real?
a = rq/qq[peri_k];
axpy(psi,a,p[peri_k],psi);
cp = axpy_norm(r,-a,q[peri_k],r);
cp = axpy_norm(r,-a,q[peri_k],r);
LinalgTimer.Stop();
if((k==nstep-1)||(cp<rsq)){
return cp;
@ -202,6 +211,8 @@ namespace Grid {
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
Linop.HermOp(z,tmp);
MatTimer.Stop();
LinalgTimer.Start();
tmp=tmp-r;
std::cout<<GridLogMessage<< " Preconditioner resid " <<sqrt(norm2(tmp)/norm2(r))<<std::endl;
@ -219,9 +230,9 @@ namespace Grid {
}
qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm
LinalgTimer.Stop();
}
assert(0); // never reached
return cp;
}

View File

@ -87,228 +87,25 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
// Use base class to share code
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Now make the norm reflect extra factor of Mee
template<class Field> class SchurRedBlackStaggeredSolve {
private:
OperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise;
bool subGuess;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
_HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=0;
subtractGuess(initSubGuess);
};
void subtractGuess(const bool initSubGuess)
{
subGuess = initSubGuess;
}
bool isSubtractGuess(void)
{
return subGuess;
}
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
template<class Matrix, class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out, Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
Field src_e(grid);
Field src_o(grid);
Field sol_e(grid);
Field sol_o(grid);
Field tmp(grid);
Field Mtmp(grid);
Field resid(fgrid);
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve " <<std::endl;
pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" <<std::endl;
/////////////////////////////////////////////////////
// src_o = (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
//src_o = tmp; assert(src_o.checkerboard ==Odd);
_Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl;
guess(src_o, sol_o);
Mtmp = sol_o;
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver called the Mpc solver" <<std::endl;
// Fionn A2A boolean behavioural control
if (subGuess) sol_o = sol_o-Mtmp;
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver reconstructed other CB" <<std::endl;
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver inserted solution" <<std::endl;
// Verify the unprec residual
if ( ! subGuess ) {
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
} else {
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
}
}
};
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagMooeeSolve {
private:
template<class Field> class SchurRedBlackBase {
protected:
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
OperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise;
bool subGuess;
bool useSolnAsInitGuess; // if true user-supplied solution vector is used as initial guess for solver
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver,int cb=0, const bool initSubGuess = false) : _HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=cb;
subtractGuess(initSubGuess);
};
void subtractGuess(const bool initSubGuess)
{
subGuess = initSubGuess;
}
bool isSubtractGuess(void)
{
return subGuess;
}
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
template<class Matrix, class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
Field src_e(grid);
Field src_o(grid);
Field sol_e(grid);
Field sol_o(grid);
Field tmp(grid);
Field Mtmp(grid);
Field resid(fgrid);
pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
// get the right MpcDag
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
guess(src_o,sol_o);
Mtmp = sol_o;
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
// Fionn A2A boolean behavioural control
if (subGuess) sol_o = sol_o-Mtmp;
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
// Verify the unprec residual
if ( ! subGuess ) {
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackDiagMooee solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
} else {
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
}
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagTwoSolve {
private:
OperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise;
bool subGuess;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
_HermitianRBSolver(HermitianRBSolver)
SchurRedBlackBase(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false,
const bool _solnAsInitGuess = false) :
_HermitianRBSolver(HermitianRBSolver),
useSolnAsInitGuess(_solnAsInitGuess)
{
CBfactorise = 0;
subtractGuess(initSubGuess);
@ -322,12 +119,90 @@ namespace Grid {
return subGuess;
}
template<class Matrix>
/////////////////////////////////////////////////////////////
// Shared code
/////////////////////////////////////////////////////////////
void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
template<class Matrix,class Guesser>
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out)
{
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
template<class Guesser>
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
int nblock = in.size();
std::vector<Field> src_o(nblock,grid);
std::vector<Field> sol_o(nblock,grid);
std::vector<Field> guess_save;
Field resid(fgrid);
Field tmp(grid);
////////////////////////////////////////////////
// Prepare RedBlack source
////////////////////////////////////////////////
for(int b=0;b<nblock;b++){
RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
}
////////////////////////////////////////////////
// Make the guesses
////////////////////////////////////////////////
if ( subGuess ) guess_save.resize(nblock,grid);
for(int b=0;b<nblock;b++){
if(useSolnAsInitGuess) {
pickCheckerboard(Odd, sol_o[b], out[b]);
} else {
guess(src_o[b],sol_o[b]);
}
if ( subGuess ) {
guess_save[b] = sol_o[b];
}
}
//////////////////////////////////////////////////////////////
// Call the block solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlackBase calling the solver for "<<nblock<<" RHS" <<std::endl;
RedBlackSolve(_Matrix,src_o,sol_o);
////////////////////////////////////////////////
// A2A boolean behavioural control & reconstruct other checkerboard
////////////////////////////////////////////////
for(int b=0;b<nblock;b++) {
if (subGuess) sol_o[b] = sol_o[b] - guess_save[b];
///////// Needs even source //////////////
pickCheckerboard(Even,tmp,in[b]);
RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]);
/////////////////////////////////////////////////
// Check unprec residual if possible
/////////////////////////////////////////////////
if ( ! subGuess ) {
_Matrix.M(out[b],resid);
resid = resid-in[b];
RealD ns = norm2(in[b]);
RealD nr = norm2(resid);
std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl;
} else {
std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl;
}
}
}
template<class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function
@ -335,52 +210,42 @@ namespace Grid {
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
Field src_e(grid);
Field src_o(grid);
Field sol_e(grid);
Field sol_o(grid);
Field tmp(grid);
Field Mtmp(grid);
Field resid(fgrid);
Field src_o(grid);
Field src_e(grid);
Field sol_o(grid);
pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
////////////////////////////////////////////////
// RedBlack source
////////////////////////////////////////////////
RedBlackSource(_Matrix,in,src_e,src_o);
// get the right MpcDag
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
////////////////////////////////
// Construct the guess
////////////////////////////////
if(useSolnAsInitGuess) {
pickCheckerboard(Odd, sol_o, out);
} else {
guess(src_o,sol_o);
}
Field guess_save(grid);
guess_save = sol_o;
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
guess(src_o,tmp);
Mtmp = tmp;
_HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
RedBlackSolve(_Matrix,src_o,sol_o);
////////////////////////////////////////////////
// Fionn A2A boolean behavioural control
if (subGuess) tmp = tmp-Mtmp;
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
////////////////////////////////////////////////
if (subGuess) sol_o= sol_o-guess_save;
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
// RedBlack solution needs the even source
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
RedBlackSolution(_Matrix,sol_o,src_e,out);
// Verify the unprec residual
if ( ! subGuess ) {
@ -389,68 +254,185 @@ namespace Grid {
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
std::cout<<GridLogMessage << "SchurRedBlackBase solver true unprec resid "<< std::sqrt(nr/ns) << std::endl;
} else {
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl;
}
}
/////////////////////////////////////////////////////////////
// Override in derived.
/////////////////////////////////////////////////////////////
virtual void RedBlackSource (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) =0;
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) =0;
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) =0;
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)=0;
};
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagTwoMixed {
private:
LinearFunction<Field> & _HermitianRBSolver;
int CBfactorise;
bool subGuess;
template<class Field> class SchurRedBlackStaggeredSolve : public SchurRedBlackBase<Field> {
public:
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false,
const bool _solnAsInitGuess = false)
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess,_solnAsInitGuess)
{
}
//////////////////////////////////////////////////////
// Override RedBlack specialisation
//////////////////////////////////////////////////////
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field tmp(grid);
Field Mtmp(grid);
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd ,src_o,src);
/////////////////////////////////////////////////////
// src_o = (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
_Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
}
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e_c,Field &sol)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field tmp(grid);
Field sol_e(grid);
Field src_e(grid);
src_e = src_e_c; // Const correctness
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_o); assert( sol_o.checkerboard ==Odd );
}
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
{
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
};
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
{
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
}
};
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Site diagonal has Mooee on it.
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagMooeeSolve : public SchurRedBlackBase<Field> {
public:
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false,
const bool _solnAsInitGuess = false)
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess,_solnAsInitGuess) {};
//////////////////////////////////////////////////////
// Override RedBlack specialisation
//////////////////////////////////////////////////////
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field tmp(grid);
Field Mtmp(grid);
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd ,src_o,src);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
// get the right MpcDag
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
}
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field tmp(grid);
Field sol_e(grid);
Field src_e_i(grid);
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e_i = src_e-tmp; assert( src_e_i.checkerboard ==Even);
_Matrix.MooeeInv(src_e_i,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_o); assert( sol_o.checkerboard ==Odd );
}
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
{
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
};
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
{
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Site diagonal is identity, right preconditioned by Mee^inv
// ( 1 - Meo Moo^inv Moe Mee^inv ) phi =( 1 - Meo Moo^inv Moe Mee^inv ) Mee psi = = eta = eta
//=> psi = MeeInv phi
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagTwoSolve : public SchurRedBlackBase<Field> {
public:
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackDiagTwoMixed(LinearFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
_HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=0;
subtractGuess(initSubGuess);
};
void subtractGuess(const bool initSubGuess)
{
subGuess = initSubGuess;
}
bool isSubtractGuess(void)
{
return subGuess;
}
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false,
const bool _solnAsInitGuess = false)
: SchurRedBlackBase<Field>(HermitianRBSolver,initSubGuess,_solnAsInitGuess) {};
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
ZeroGuesser<Field> guess;
(*this)(_Matrix,in,out,guess);
}
template<class Matrix, class Guesser>
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
Field src_e(grid);
Field src_o(grid);
Field sol_e(grid);
Field sol_o(grid);
Field tmp(grid);
Field Mtmp(grid);
Field resid(fgrid);
pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd ,src_o,src);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
@ -461,43 +443,44 @@ namespace Grid {
// get the right MpcDag
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
}
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
// _HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
guess(src_o,tmp);
Mtmp = tmp;
_HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
// Fionn A2A boolean behavioural control
if (subGuess) tmp = tmp-Mtmp;
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
{
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
Field sol_o_i(grid);
Field tmp(grid);
Field sol_e(grid);
////////////////////////////////////////////////
// MooeeInv due to pecond
////////////////////////////////////////////////
_Matrix.MooeeInv(sol_o,tmp);
sol_o_i = tmp;
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
_Matrix.Meooe(sol_o_i,tmp); assert( tmp.checkerboard ==Even);
tmp = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(tmp,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(sol,sol_o_i); assert( sol_o_i.checkerboard ==Odd );
};
// Verify the unprec residual
if ( ! subGuess ) {
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout << GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid " << std::sqrt(nr / ns) << " nr " << nr << " ns " << ns << std::endl;
} else {
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
}
}
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
{
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
};
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
{
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
}
};
}
#endif

View File

@ -50,15 +50,15 @@ void CartesianCommunicator::Init(int *argc, char ***argv)
assert(0);
}
Grid_quiesce_nodes();
// Never clean up as done once.
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
Grid_quiesce_nodes();
GlobalSharedMemory::Init(communicator_world);
GlobalSharedMemory::SharedMemoryAllocate(
GlobalSharedMemory::MAX_MPI_SHM_BYTES,
GlobalSharedMemory::Hugepages);
Grid_unquiesce_nodes();
}
///////////////////////////////////////////////////////////////////////////
@ -107,8 +107,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
//////////////////////////////////
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
{
_ndimension = processors.size();
_ndimension = processors.size(); assert(_ndimension>=1);
int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
std::vector<int> parent_processor_coor(_ndimension,0);
std::vector<int> parent_processors (_ndimension,1);
@ -124,10 +123,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
// split the communicator
//////////////////////////////////////////////////////////////////////////////////////////////////////
// int Nparent = parent._processors ;
// std::cout << " splitting from communicator "<<parent.communicator <<std::endl;
int Nparent;
MPI_Comm_size(parent.communicator,&Nparent);
// std::cout << " Parent size "<<Nparent <<std::endl;
int childsize=1;
for(int d=0;d<processors.size();d++) {
@ -136,8 +133,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
int Nchild = Nparent/childsize;
assert (childsize * Nchild == Nparent);
// std::cout << " child size "<<childsize <<std::endl;
std::vector<int> ccoor(_ndimension); // coor within subcommunicator
std::vector<int> scoor(_ndimension); // coor of split within parent
std::vector<int> ssize(_ndimension); // coor of split within parent

View File

@ -52,7 +52,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
_processors = processors;
_ndimension = processors.size();
_ndimension = processors.size(); assert(_ndimension>=1);
_processor_coor.resize(_ndimension);
// Require 1^N processor grid for fake

View File

@ -103,6 +103,8 @@ class GlobalSharedMemory {
//////////////////////////////////////////////////////////////////////////////////////
static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD
static void OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian
static void OptimalCommunicatorHypercube(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian
static void OptimalCommunicatorSharedMemory(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian
///////////////////////////////////////////////////
// Provide shared memory facilities off comm world
///////////////////////////////////////////////////

View File

@ -132,7 +132,22 @@ int Log2Size(int TwoToPower,int MAXLOG2)
}
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
{
#ifdef HYPERCUBE
//////////////////////////////////////////////////////////////////////////////
// Look and see if it looks like an HPE 8600 based on hostname conventions
//////////////////////////////////////////////////////////////////////////////
const int namelen = _POSIX_HOST_NAME_MAX;
char name[namelen];
int R;
int I;
int N;
gethostname(name,namelen);
int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
if(nscan==3) OptimalCommunicatorHypercube(processors,optimal_comm);
else OptimalCommunicatorSharedMemory(processors,optimal_comm);
}
void GlobalSharedMemory::OptimalCommunicatorHypercube(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
{
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
@ -253,7 +268,9 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
/////////////////////////////////////////////////////////////////
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
assert(ierr==0);
#else
}
void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
{
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
@ -306,7 +323,6 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
/////////////////////////////////////////////////////////////////
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
assert(ierr==0);
#endif
}
////////////////////////////////////////////////////////////////////////////////////////////
// SHMGET
@ -337,7 +353,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
int errsv = errno;
printf("Errno %d\n",errsv);
printf("key %d\n",key);
printf("size %lld\n",size);
printf("size %ld\n",size);
printf("flags %d\n",flags);
perror("shmget");
exit(1);
@ -413,7 +429,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
assert(((uint64_t)ptr&0x3F)==0);
close(fd);
WorldShmCommBufs[r] =ptr;
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
// std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
}
_ShmAlloc=1;
_ShmAllocBytes = bytes;
@ -455,7 +471,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
assert(((uint64_t)ptr&0x3F)==0);
close(fd);
WorldShmCommBufs[r] =ptr;
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
// std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
}
_ShmAlloc=1;
_ShmAllocBytes = bytes;
@ -499,7 +515,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#endif
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
// std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
if ( ptr == (void * )MAP_FAILED ) {
perror("failed mmap");
assert(0);

View File

@ -85,7 +85,7 @@ class LatticeTrinaryExpression :public std::pair<Op,std::tuple<T1,T2,T3> >, publ
void inline conformable(GridBase *lhs,GridBase *rhs)
{
assert(lhs == rhs);
assert((lhs == rhs) && " conformable check pointers mismatch ");
}
template<class vobj>

View File

@ -392,14 +392,10 @@ namespace Grid {
void SeedUniqueString(const std::string &s){
std::vector<int> seeds;
std::stringstream sha;
seeds = GridChecksum::sha256_seeds(s);
for(int i=0;i<seeds.size();i++) {
sha << std::hex << seeds[i];
}
std::cout << GridLogMessage << "Intialising parallel RNG with unique string '"
<< s << "'" << std::endl;
std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl;
std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl;
SeedFixedIntegers(seeds);
}
void SeedFixedIntegers(const std::vector<int> &seeds){

View File

@ -464,8 +464,10 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
assert(orthog>=0);
for(int d=0;d<nh;d++){
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
if ( d!=orthog ) {
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
}
// the above should guarantee that the operations are local
@ -485,7 +487,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
template<class vobj>
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{
typedef typename vobj::scalar_object sobj;
@ -499,8 +501,10 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slic
assert(orthog>=0);
for(int d=0;d<nh;d++){
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
if ( d!=orthog ) {
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
}
// the above should guarantee that the operations are local

View File

@ -59,6 +59,7 @@ void GridLogTimestamp(int on){
}
Colours GridLogColours(0);
GridLogger GridLogMG (1, "MG" , GridLogColours, "NORMAL");
GridLogger GridLogIRL (1, "IRL" , GridLogColours, "NORMAL");
GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL");
GridLogger GridLogError (1, "Error" , GridLogColours, "RED");
@ -76,19 +77,18 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
GridLogIterative.Active(0);
GridLogDebug.Active(0);
GridLogPerformance.Active(0);
GridLogIntegrator.Active(0);
GridLogIntegrator.Active(1);
GridLogColours.Active(0);
for (int i = 0; i < logstreams.size(); i++) {
if (logstreams[i] == std::string("Error")) GridLogError.Active(1);
if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1);
if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0);
if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1);
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
if (logstreams[i] == std::string("Performance"))
GridLogPerformance.Active(1);
if (logstreams[i] == std::string("Integrator")) GridLogIntegrator.Active(1);
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
if (logstreams[i] == std::string("Error")) GridLogError.Active(1);
if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1);
if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0);
if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1);
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1);
if (logstreams[i] == std::string("Integrator")) GridLogIntegrator.Active(1);
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
}
}

View File

@ -146,9 +146,11 @@ public:
if ( log.timestamp ) {
log.StopWatch->Stop();
GridTime now = log.StopWatch->Elapsed();
if ( log.timing_mode==1 ) log.StopWatch->Reset();
log.StopWatch->Start();
stream << log.evidence()<< std::setw(6)<<now << log.background() << " : " ;
stream << log.evidence()
<< now << log.background() << " : " ;
}
stream << log.colour();
return stream;
@ -167,6 +169,7 @@ public:
void GridLogConfigure(std::vector<std::string> &logstreams);
extern GridLogger GridLogMG;
extern GridLogger GridLogIRL;
extern GridLogger GridLogSolver;
extern GridLogger GridLogError;

View File

@ -0,0 +1,3 @@
#include <Grid/GridCore.h>
int Grid::BinaryIO::latticeWriteMaxRetry = -1;

View File

@ -81,6 +81,7 @@ inline void removeWhitespace(std::string &key)
///////////////////////////////////////////////////////////////////////////////////////////////////
class BinaryIO {
public:
static int latticeWriteMaxRetry;
/////////////////////////////////////////////////////////////////////////////
// more byte manipulation helpers
@ -209,10 +210,10 @@ PARALLEL_CRITICAL
static inline void le32toh_v(void *file_object,uint64_t bytes)
{
uint32_t *fp = (uint32_t *)file_object;
uint32_t f;
uint64_t count = bytes/sizeof(uint32_t);
parallel_for(uint64_t i=0;i<count;i++){
uint32_t f;
f = fp[i];
// got network order and the network to host
f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
@ -234,10 +235,9 @@ PARALLEL_CRITICAL
static inline void le64toh_v(void *file_object,uint64_t bytes)
{
uint64_t *fp = (uint64_t *)file_object;
uint64_t f,g;
uint64_t count = bytes/sizeof(uint64_t);
parallel_for(uint64_t i=0;i<count;i++){
uint64_t f,g;
f = fp[i];
// got network order and the network to host
g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
@ -348,7 +348,8 @@ PARALLEL_CRITICAL
int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64"));
assert(ieee64||ieee32|ieee64big||ieee32big);
assert((ieee64+ieee32+ieee64big+ieee32big)==1);
//////////////////////////////////////////////////////////////////////////////
// Do the I/O
//////////////////////////////////////////////////////////////////////////////
@ -370,7 +371,7 @@ PARALLEL_CRITICAL
#endif
} else {
std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl;
<< iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
std::ifstream fin;
fin.open(file, std::ios::binary | std::ios::in);
if (control & BINARYIO_MASTER_APPEND)
@ -582,7 +583,9 @@ PARALLEL_CRITICAL
typedef typename vobj::scalar_object sobj;
typedef typename vobj::Realified::scalar_type word; word w=0;
GridBase *grid = Umu._grid;
uint64_t lsites = grid->lSites();
uint64_t lsites = grid->lSites(), offsetCopy = offset;
int attemptsLeft = std::max(0, BinaryIO::latticeWriteMaxRetry);
bool checkWrite = (BinaryIO::latticeWriteMaxRetry >= 0);
std::vector<sobj> scalardata(lsites);
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
@ -597,9 +600,36 @@ PARALLEL_CRITICAL
grid->Barrier();
timer.Stop();
while (attemptsLeft >= 0)
{
grid->Barrier();
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
nersc_csum,scidac_csuma,scidac_csumb);
if (checkWrite)
{
std::vector<fobj> ckiodata(lsites);
uint32_t cknersc_csum, ckscidac_csuma, ckscidac_csumb;
uint64_t ckoffset = offsetCopy;
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << "writeLatticeObject: read back object" << std::endl;
grid->Barrier();
IOobject(w,grid,ckiodata,file,ckoffset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
cknersc_csum,ckscidac_csuma,ckscidac_csumb);
if ((cknersc_csum != nersc_csum) or (ckscidac_csuma != scidac_csuma) or (ckscidac_csumb != scidac_csumb))
{
std::cout << GridLogMessage << "writeLatticeObject: read test checksum failure, re-writing (" << attemptsLeft << " attempt(s) remaining)" << std::endl;
offset = offsetCopy;
parallel_for(uint64_t x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
}
else
{
std::cout << GridLogMessage << "writeLatticeObject: read test checksum correct" << std::endl;
break;
}
}
attemptsLeft--;
}
std::cout<<GridLogMessage<<"writeLatticeObject: unvectorize overhead "<<timer.Elapsed() <<std::endl;
}
@ -725,5 +755,6 @@ PARALLEL_CRITICAL
std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
}
};
}
#endif

View File

@ -46,6 +46,12 @@ extern "C" {
namespace Grid {
namespace QCD {
#define GRID_FIELD_NORM "FieldNormMetaData"
#define GRID_FIELD_NORM_CALC(FieldNormMetaData_, n2ck) \
0.5*fabs(FieldNormMetaData_.norm2 - n2ck)/(FieldNormMetaData_.norm2 + n2ck)
#define GRID_FIELD_NORM_CHECK(FieldNormMetaData_, n2ck) \
assert(GRID_FIELD_NORM_CALC(FieldNormMetaData_, n2ck) < 1.0e-5);
/////////////////////////////////
// Encode word types as strings
/////////////////////////////////
@ -205,6 +211,7 @@ class GridLimeReader : public BinaryIO {
{
typedef typename vobj::scalar_object sobj;
scidacChecksum scidacChecksum_;
FieldNormMetaData FieldNormMetaData_;
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
std::string format = getFormatString<vobj>();
@ -233,20 +240,52 @@ class GridLimeReader : public BinaryIO {
// std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << "SciDAC checksum A " << std::hex << scidac_csuma << std::dec << std::endl;
std::cout << GridLogMessage << "SciDAC checksum B " << std::hex << scidac_csumb << std::dec << std::endl;
/////////////////////////////////////////////
// Insist checksum is next record
/////////////////////////////////////////////
readLimeObject(scidacChecksum_,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
readScidacChecksum(scidacChecksum_,FieldNormMetaData_);
/////////////////////////////////////////////
// Verify checksums
/////////////////////////////////////////////
if(FieldNormMetaData_.norm2 != 0.0){
RealD n2ck = norm2(field);
std::cout << GridLogMessage << "Field norm: metadata= " << FieldNormMetaData_.norm2
<< " / field= " << n2ck << " / rdiff= " << GRID_FIELD_NORM_CALC(FieldNormMetaData_,n2ck) << std::endl;
GRID_FIELD_NORM_CHECK(FieldNormMetaData_,n2ck);
}
assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1);
// find out if next field is a GridFieldNorm
return;
}
}
}
void readScidacChecksum(scidacChecksum &scidacChecksum_,
FieldNormMetaData &FieldNormMetaData_)
{
FieldNormMetaData_.norm2 =0.0;
std::string scidac_str(SCIDAC_CHECKSUM);
std::string field_norm_str(GRID_FIELD_NORM);
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
std::vector<char> xmlc(nbytes+1,'\0');
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
std::string xmlstring = std::string(&xmlc[0]);
XmlReader RD(xmlstring, true, "");
if ( !strncmp(limeReaderType(LimeR), field_norm_str.c_str(),strlen(field_norm_str.c_str()) ) ) {
// std::cout << "FieldNormMetaData "<<xmlstring<<std::endl;
read(RD,field_norm_str,FieldNormMetaData_);
}
if ( !strncmp(limeReaderType(LimeR), scidac_str.c_str(),strlen(scidac_str.c_str()) ) ) {
// std::cout << SCIDAC_CHECKSUM << " " <<xmlstring<<std::endl;
read(RD,std::string("scidacChecksum"),scidacChecksum_);
return;
}
}
assert(0);
}
////////////////////////////////////////////
// Read a generic serialisable object
////////////////////////////////////////////
@ -265,7 +304,7 @@ class GridLimeReader : public BinaryIO {
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
// std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
xmlstring = std::string(&xmlc[0]);
xmlstring = std::string(&xmlc[0]);
return;
}
@ -279,8 +318,8 @@ class GridLimeReader : public BinaryIO {
std::string xmlstring;
readLimeObject(xmlstring, record_name);
XmlReader RD(xmlstring, true, "");
read(RD,object_name,object);
XmlReader RD(xmlstring, true, "");
read(RD,object_name,object);
}
};
@ -389,6 +428,8 @@ class GridLimeWriter : public BinaryIO
GridBase *grid = field._grid;
assert(boss_node == field._grid->IsBoss() );
FieldNormMetaData FNMD; FNMD.norm2 = norm2(field);
////////////////////////////////////////////
// Create record header
////////////////////////////////////////////
@ -447,6 +488,7 @@ class GridLimeWriter : public BinaryIO
checksum.suma= streama.str();
checksum.sumb= streamb.str();
if ( boss_node ) {
writeLimeObject(0,0,FNMD,std::string(GRID_FIELD_NORM),std::string(GRID_FIELD_NORM));
writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
}
}
@ -624,6 +666,12 @@ class IldgWriter : public ScidacWriter {
assert(header.nd==4);
assert(header.nd==header.dimension.size());
//////////////////////////////////////////////////////////////////////////////
// Field norm tests
//////////////////////////////////////////////////////////////////////////////
FieldNormMetaData FieldNormMetaData_;
FieldNormMetaData_.norm2 = norm2(Umu);
//////////////////////////////////////////////////////////////////////////////
// Fill the USQCD info field
//////////////////////////////////////////////////////////////////////////////
@ -632,11 +680,12 @@ class IldgWriter : public ScidacWriter {
info.plaq = header.plaquette;
info.linktr = header.link_trace;
std::cout << GridLogMessage << " Writing config; IldgIO "<<std::endl;
// std::cout << GridLogMessage << " Writing config; IldgIO n2 "<< FieldNormMetaData_.norm2<<std::endl;
//////////////////////////////////////////////
// Fill the Lime file record by record
//////////////////////////////////////////////
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
writeLimeObject(0,0,FieldNormMetaData_,FieldNormMetaData_.SerialisableClassName(),std::string(GRID_FIELD_NORM));
writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
writeLimeObject(0,1,info,info.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
writeLimeObject(1,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
@ -679,6 +728,7 @@ class IldgReader : public GridLimeReader {
std::string ildgLFN_ ;
scidacChecksum scidacChecksum_;
usqcdInfo usqcdInfo_ ;
FieldNormMetaData FieldNormMetaData_;
// track what we read from file
int found_ildgFormat =0;
@ -687,7 +737,7 @@ class IldgReader : public GridLimeReader {
int found_usqcdInfo =0;
int found_ildgBinary =0;
int found_FieldMetaData =0;
int found_FieldNormMetaData =0;
uint32_t nersc_csum;
uint32_t scidac_csuma;
uint32_t scidac_csumb;
@ -721,7 +771,7 @@ class IldgReader : public GridLimeReader {
//////////////////////////////////
// ILDG format record
std::string xmlstring(&xmlc[0]);
std::string xmlstring(&xmlc[0]);
if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {
XmlReader RD(xmlstring, true, "");
@ -774,11 +824,17 @@ class IldgReader : public GridLimeReader {
found_scidacChecksum = 1;
}
if ( !strncmp(limeReaderType(LimeR), GRID_FIELD_NORM,strlen(GRID_FIELD_NORM)) ) {
XmlReader RD(xmlstring, true, "");
read(RD,GRID_FIELD_NORM,FieldNormMetaData_);
found_FieldNormMetaData = 1;
}
} else {
/////////////////////////////////
// Binary data
/////////////////////////////////
std::cout << GridLogMessage << "ILDG Binary record found : " ILDG_BINARY_DATA << std::endl;
// std::cout << GridLogMessage << "ILDG Binary record found : " ILDG_BINARY_DATA << std::endl;
uint64_t offset= ftello(File);
if ( format == std::string("IEEE64BIG") ) {
GaugeSimpleMunger<dobj, sobj> munge;
@ -845,6 +901,13 @@ class IldgReader : public GridLimeReader {
////////////////////////////////////////////////////////////
// Really really want to mandate a scidac checksum
////////////////////////////////////////////////////////////
if ( found_FieldNormMetaData ) {
RealD nn = norm2(Umu);
GRID_FIELD_NORM_CHECK(FieldNormMetaData_,nn);
std::cout << GridLogMessage<<"FieldNormMetaData matches " << std::endl;
} else {
std::cout << GridLogWarning<<"FieldNormMetaData not found. " << std::endl;
}
if ( found_scidacChecksum ) {
FieldMetaData_.scidac_checksuma = stoull(scidacChecksum_.suma,0,16);
FieldMetaData_.scidac_checksumb = stoull(scidacChecksum_.sumb,0,16);

View File

@ -56,6 +56,10 @@ namespace Grid {
////////////////////////////////////////////////////////////////////////////////
// header specification/interpretation
////////////////////////////////////////////////////////////////////////////////
class FieldNormMetaData : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(FieldNormMetaData, double, norm2);
};
class FieldMetaData : Serializable {
public:

View File

@ -49,21 +49,39 @@ inline double usecond(void) {
typedef std::chrono::system_clock GridClock;
typedef std::chrono::time_point<GridClock> GridTimePoint;
typedef std::chrono::milliseconds GridMillisecs;
typedef std::chrono::microseconds GridTime;
typedef std::chrono::microseconds GridUsecs;
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time)
typedef std::chrono::seconds GridSecs;
typedef std::chrono::milliseconds GridMillisecs;
typedef std::chrono::microseconds GridUsecs;
typedef std::chrono::microseconds GridTime;
inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time)
{
stream << time.count()<<" ms";
stream << time.count()<<" s";
return stream;
}
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::microseconds & time)
inline std::ostream& operator<< (std::ostream & stream, const GridMillisecs & now)
{
stream << time.count()<<" usec";
GridSecs second(1);
auto secs = now/second ;
auto subseconds = now%second ;
auto fill = stream.fill();
stream << secs<<"."<<std::setw(3)<<std::setfill('0')<<subseconds.count()<<" s";
stream.fill(fill);
return stream;
}
inline std::ostream& operator<< (std::ostream & stream, const GridUsecs & now)
{
GridSecs second(1);
auto seconds = now/second ;
auto subseconds = now%second ;
auto fill = stream.fill();
stream << seconds<<"."<<std::setw(6)<<std::setfill('0')<<subseconds.count()<<" s";
stream.fill(fill);
return stream;
}
class GridStopWatch {
private:
bool running;

View File

@ -90,17 +90,20 @@ namespace QCD {
// That probably makes for GridRedBlack4dCartesian grid.
// s,sp,c,spc,lc
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
template<typename vtype> using iSpinColourSpinColourMatrix = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >;
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
@ -127,10 +130,28 @@ namespace QCD {
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
typedef iSpinColourMatrix<ComplexF > SpinColourMatrixF;
typedef iSpinColourMatrix<ComplexD > SpinColourMatrixD;
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
// SpinColourSpinColour matrix
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
// SpinColourSpinColour matrix
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
// LorentzColour
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
@ -229,6 +250,9 @@ namespace QCD {
typedef Lattice<vSpinColourMatrixF> LatticeSpinColourMatrixF;
typedef Lattice<vSpinColourMatrixD> LatticeSpinColourMatrixD;
typedef Lattice<vSpinColourSpinColourMatrix> LatticeSpinColourSpinColourMatrix;
typedef Lattice<vSpinColourSpinColourMatrixF> LatticeSpinColourSpinColourMatrixF;
typedef Lattice<vSpinColourSpinColourMatrixD> LatticeSpinColourSpinColourMatrixD;
typedef Lattice<vLorentzColourMatrix> LatticeLorentzColourMatrix;
typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;

View File

@ -44,12 +44,15 @@ namespace QCD {
struct WilsonImplParams {
bool overlapCommsCompute;
std::vector<Real> twist_n_2pi_L;
std::vector<Complex> boundary_phases;
WilsonImplParams() : overlapCommsCompute(false) {
boundary_phases.resize(Nd, 1.0);
twist_n_2pi_L.resize(Nd, 0.0);
};
WilsonImplParams(const std::vector<Complex> phi)
: boundary_phases(phi), overlapCommsCompute(false) {}
WilsonImplParams(const std::vector<Complex> phi) : boundary_phases(phi), overlapCommsCompute(false) {
twist_n_2pi_L.resize(Nd, 0.0);
}
};
struct StaggeredImplParams {
@ -63,7 +66,8 @@ namespace QCD {
int, MaxIter,
RealD, tolerance,
int, degree,
int, precision);
int, precision,
int, BoundsCheckFreq);
// MaxIter and tolerance, vectors??
@ -73,13 +77,15 @@ namespace QCD {
int _maxit = 1000,
RealD tol = 1.0e-8,
int _degree = 10,
int _precision = 64)
int _precision = 64,
int _BoundsCheckFreq=20)
: lo(_lo),
hi(_hi),
MaxIter(_maxit),
tolerance(tol),
degree(_degree),
precision(_precision){};
precision(_precision),
BoundsCheckFreq(_BoundsCheckFreq){};
};

View File

@ -68,6 +68,26 @@ void CayleyFermion5D<Impl>::ExportPhysicalFermionSolution(const FermionField &so
ExtractSlice(exported4d, tmp, 0, 0);
}
template<class Impl>
void CayleyFermion5D<Impl>::P(const FermionField &psi, FermionField &chi)
{
int Ls= this->Ls;
chi=zero;
for(int s=0;s<Ls;s++){
axpby_ssp_pminus(chi,1.0,chi,1.0,psi,s,s);
axpby_ssp_pplus (chi,1.0,chi,1.0,psi,s,(s+1)%Ls);
}
}
template<class Impl>
void CayleyFermion5D<Impl>::Pdag(const FermionField &psi, FermionField &chi)
{
int Ls= this->Ls;
chi=zero;
for(int s=0;s<Ls;s++){
axpby_ssp_pminus(chi,1.0,chi,1.0,psi,s,s);
axpby_ssp_pplus (chi,1.0,chi,1.0,psi,s,(s-1+Ls)%Ls);
}
}
template<class Impl>
void CayleyFermion5D<Impl>::ExportPhysicalFermionSource(const FermionField &solution5d,FermionField &exported4d)
{
int Ls = this->Ls;
@ -465,9 +485,13 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
double bpc = b+c;
double bmc = b-c;
_b = b;
_c = c;
_gamma = gamma; // Save the parameters so we can change mass later.
_zolo_hi= zolo_hi;
for(int i=0; i < Ls; i++){
as[i] = 1.0;
omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code
omega[i] = _gamma[i]*_zolo_hi; //NB reciprocal relative to Chroma NEF code
assert(omega[i]!=Coeff_t(0.0));
bs[i] = 0.5*(bpc/omega[i] + bmc);
cs[i] = 0.5*(bpc/omega[i] - bmc);

View File

@ -93,6 +93,17 @@ namespace Grid {
virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d);
virtual void ImportUnphysicalFermion(const FermionField &solution5d, FermionField &exported4d);
///////////////////////////////////////////////////////////////
// Support for MADWF tricks
///////////////////////////////////////////////////////////////
RealD Mass(void) { return mass; };
void SetMass(RealD _mass) {
mass=_mass;
SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs
} ;
void P(const FermionField &psi, FermionField &chi);
void Pdag(const FermionField &psi, FermionField &chi);
/////////////////////////////////////////////////////
// Instantiate different versions depending on Impl
/////////////////////////////////////////////////////
@ -139,6 +150,12 @@ namespace Grid {
// protected:
RealD mass;
// Save arguments to SetCoefficientsInternal
std::vector<Coeff_t> _gamma;
RealD _zolo_hi;
RealD _b;
RealD _c;
// Cayley form Moebius (tanh and zolotarev)
std::vector<Coeff_t> omega;
std::vector<Coeff_t> bs; // S dependent coeffs

View File

@ -43,7 +43,7 @@ namespace Grid {
INHERIT_IMPL_TYPES(Impl);
public:
void FreePropagator(const FermionField &in,FermionField &out,RealD mass, std::vector<double> twist, bool fiveD) {
void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary, std::vector<double> twist, bool fiveD) {
FermionField in_k(in._grid);
FermionField prop_k(in._grid);
@ -53,17 +53,22 @@ namespace Grid {
ComplexField coor(in._grid);
ComplexField ph(in._grid); ph = zero;
FermionField in_buf(in._grid); in_buf = zero;
Complex ci(0.0,1.0);
Scalar ci(0.0,1.0);
assert(twist.size() == Nd);//check that twist is Nd
assert(boundary.size() == Nd);//check that boundary conditions is Nd
int shift = 0;
if(fiveD) shift = 1;
for(unsigned int nu = 0; nu < Nd; nu++)
{
// Shift coordinate lattice index by 1 to account for 5th dimension.
LatticeCoordinate(coor, nu + shift);
ph = ph + twist[nu]*coor*((1./(in._grid->_fdimensions[nu+shift])));
double boundary_phase = ::acos(real(boundary[nu]));
ph = ph + boundary_phase*coor*((1./(in._grid->_fdimensions[nu+shift])));
//momenta for propagator shifted by twist+boundary
twist[nu] = twist[nu] + boundary_phase/((2.0*M_PI));
}
in_buf = exp((Real)(2.0*M_PI)*ci*ph*(-1.0))*in;
in_buf = exp(ci*ph*(-1.0))*in;
if(fiveD){//FFT only on temporal and spatial dimensions
std::vector<int> mask(Nd+1,1); mask[0] = 0;
@ -76,25 +81,28 @@ namespace Grid {
this->MomentumSpacePropagatorHt(prop_k,in_k,mass,twist);
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
}
//phase for boundary condition
out = out * exp((Real)(2.0*M_PI)*ci*ph);
out = out * exp(ci*ph);
};
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<double> twist) {
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist) {
bool fiveD = true; //5d propagator by default
FreePropagator(in,out,mass,twist,fiveD);
FreePropagator(in,out,mass,boundary,twist,fiveD);
};
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass, bool fiveD) {
std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions
FreePropagator(in,out,mass,twist,fiveD);
std::vector<Complex> boundary;
for(int i=0;i<Nd;i++) boundary.push_back(1);//default: periodic boundary conditions
FreePropagator(in,out,mass,boundary,twist,fiveD);
};
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) {
bool fiveD = true; //5d propagator by default
std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions
FreePropagator(in,out,mass,twist,fiveD);
std::vector<double> twist(Nd,0.0); //default: twist angle 0
std::vector<Complex> boundary;
for(int i=0;i<Nd;i++) boundary.push_back(1); //default: periodic boundary conditions
FreePropagator(in,out,mass,boundary,twist,fiveD);
};
virtual void Instantiatable(void) {};

View File

@ -80,12 +80,24 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
///////////////////////////////////////////////////////////////////////////////
#include <Grid/qcd/action/fermion/g5HermitianLinop.h>
///////////////////////////////////////////////////////////////////////////////
// Fourier accelerated Pauli Villars inverse support
///////////////////////////////////////////////////////////////////////////////
#include <Grid/qcd/action/fermion/WilsonTMFermion5D.h>
////////////////////////////////////////////////////////////////////////////////
// Move this group to a DWF specific tools/algorithms subdir?
////////////////////////////////////////////////////////////////////////////////
#include <Grid/qcd/action/fermion/FourierAcceleratedPV.h>
#include <Grid/qcd/action/fermion/PauliVillarsInverters.h>
#include <Grid/qcd/action/fermion/Reconstruct5Dprop.h>
#include <Grid/qcd/action/fermion/MADWF.h>
////////////////////////////////////////////////////////////////////////////////////////////////////
// More maintainable to maintain the following typedef list centrally, as more "impl" targets
// are added, (e.g. extension for gparity, half precision project in comms etc..)
////////////////////////////////////////////////////////////////////////////////////////////////////
// Cayley 5d
namespace Grid {
namespace QCD {

View File

@ -64,11 +64,6 @@ namespace Grid {
virtual RealD M (const FermionField &in, FermionField &out)=0;
virtual RealD Mdag (const FermionField &in, FermionField &out)=0;
// Query the even even properties to make algorithmic decisions
virtual int ConstEE(void) { return 1; }; // clover returns zero as EE depends on gauge field
virtual int isTrivialEE(void) { return 0; };
virtual RealD Mass(void) {return 0.0;};
// half checkerboard operaions
virtual void Meooe (const FermionField &in, FermionField &out)=0;
virtual void MeooeDag (const FermionField &in, FermionField &out)=0;
@ -101,7 +96,7 @@ namespace Grid {
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<double> twist) {
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist) {
FFT theFFT((GridCartesian *) in._grid);
FermionField in_k(in._grid);
@ -111,26 +106,33 @@ namespace Grid {
ComplexField coor(in._grid);
ComplexField ph(in._grid); ph = zero;
FermionField in_buf(in._grid); in_buf = zero;
Complex ci(0.0,1.0);
Scalar ci(0.0,1.0);
assert(twist.size() == Nd);//check that twist is Nd
assert(boundary.size() == Nd);//check that boundary conditions is Nd
for(unsigned int nu = 0; nu < Nd; nu++)
{
LatticeCoordinate(coor, nu);
ph = ph + twist[nu]*coor*((1./(in._grid->_fdimensions[nu])));
double boundary_phase = ::acos(real(boundary[nu]));
ph = ph + boundary_phase*coor*((1./(in._grid->_fdimensions[nu])));
//momenta for propagator shifted by twist+boundary
twist[nu] = twist[nu] + boundary_phase/((2.0*M_PI));
}
in_buf = exp((Real)(2.0*M_PI)*ci*ph*(-1.0))*in;
in_buf = exp(ci*ph*(-1.0))*in;
theFFT.FFT_all_dim(in_k,in_buf,FFT::forward);
this->MomentumSpacePropagator(prop_k,in_k,mass,twist);
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
//phase for boundary condition
out = out * exp((Real)(2.0*M_PI)*ci*ph);
out = out * exp(ci*ph);
};
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) {
std::vector<Complex> boundary;
for(int i=0;i<Nd;i++) boundary.push_back(1);//default: periodic boundary conditions
std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions
FreePropagator(in,out,mass,twist);
FreePropagator(in,out,mass,boundary,twist);
};
///////////////////////////////////////////////
@ -141,6 +143,7 @@ namespace Grid {
//////////////////////////////////////////////////////////////////////
// Conserved currents, either contract at sink or insert sequentially.
//////////////////////////////////////////////////////////////////////
virtual void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
@ -153,6 +156,12 @@ namespace Grid {
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx)=0;
// Only reimplemented in Wilson5D
// Default to just a zero correlation function
virtual void ContractJ5q(FermionField &q_in ,ComplexField &J5q) { J5q=zero; };
virtual void ContractJ5q(PropagatorField &q_in,ComplexField &J5q) { J5q=zero; };
///////////////////////////////////////////////
// Physical field import/export
///////////////////////////////////////////////

View File

@ -141,6 +141,7 @@ namespace QCD {
////////////////////////////////////////////////////////////////////////
#define INHERIT_FIMPL_TYPES(Impl)\
typedef Impl Impl_t; \
typedef typename Impl::FermionField FermionField; \
typedef typename Impl::PropagatorField PropagatorField; \
typedef typename Impl::DoubledGaugeField DoubledGaugeField; \
@ -239,16 +240,30 @@ namespace QCD {
GaugeLinkField tmp(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid);
////////////////////////////////////////////////////
// apply any boundary phase or twists
////////////////////////////////////////////////////
for (int mu = 0; mu < Nd; mu++) {
auto pha = Params.boundary_phases[mu];
scalar_type phase( real(pha),imag(pha) );
////////// boundary phase /////////////
auto pha = Params.boundary_phases[mu];
scalar_type phase( real(pha),imag(pha) );
int Lmu = GaugeGrid->GlobalDimensions()[mu] - 1;
int L = GaugeGrid->GlobalDimensions()[mu];
int Lmu = L - 1;
LatticeCoordinate(coor, mu);
U = PeekIndex<LorentzIndex>(Umu, mu);
// apply any twists
RealD theta = Params.twist_n_2pi_L[mu] * 2*M_PI / L;
if ( theta != 0.0) {
scalar_type twphase(::cos(theta),::sin(theta));
U = twphase*U;
std::cout << GridLogMessage << " Twist ["<<mu<<"] "<< Params.twist_n_2pi_L[mu]<< " phase"<<phase <<std::endl;
}
tmp = where(coor == Lmu, phase * U, U);
PokeIndex<LorentzIndex>(Uds, tmp, mu);

View File

@ -0,0 +1,237 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/FourierAcceleratedPV.h
Copyright (C) 2015
Author: Christoph Lehner (lifted with permission by Peter Boyle, brought back to Grid)
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
template<typename M>
void get_real_const_bc(M& m, RealD& _b, RealD& _c) {
ComplexD b,c;
b=m.bs[0];
c=m.cs[0];
std::cout << GridLogMessage << "b=" << b << ", c=" << c << std::endl;
for (size_t i=1;i<m.bs.size();i++) {
assert(m.bs[i] == b);
assert(m.cs[i] == c);
}
assert(b.imag() == 0.0);
assert(c.imag() == 0.0);
_b = b.real();
_c = c.real();
}
template<typename Vi, typename M, typename G>
class FourierAcceleratedPV {
public:
ConjugateGradient<Vi> &cg;
M& dwfPV;
G& Umu;
GridCartesian* grid5D;
GridRedBlackCartesian* gridRB5D;
int group_in_s;
FourierAcceleratedPV(M& _dwfPV, G& _Umu, ConjugateGradient<Vi> &_cg, int _group_in_s = 2)
: dwfPV(_dwfPV), Umu(_Umu), cg(_cg), group_in_s(_group_in_s)
{
assert( dwfPV.FermionGrid()->_fdimensions[0] % (2*group_in_s) == 0);
grid5D = QCD::SpaceTimeGrid::makeFiveDimGrid(2*group_in_s, (GridCartesian*)Umu._grid);
gridRB5D = QCD::SpaceTimeGrid::makeFiveDimRedBlackGrid(2*group_in_s, (GridCartesian*)Umu._grid);
}
void rotatePV(const Vi& _src, Vi& dst, bool forward) const {
GridStopWatch gsw1, gsw2;
typedef typename Vi::scalar_type Coeff_t;
int Ls = dst._grid->_fdimensions[0];
Vi _tmp(dst._grid);
double phase = M_PI / (double)Ls;
Coeff_t bzero(0.0,0.0);
FFT theFFT((GridCartesian*)dst._grid);
if (!forward) {
gsw1.Start();
for (int s=0;s<Ls;s++) {
Coeff_t a(::cos(phase*s),-::sin(phase*s));
axpby_ssp(_tmp,a,_src,bzero,_src,s,s);
}
gsw1.Stop();
gsw2.Start();
theFFT.FFT_dim(dst,_tmp,0,FFT::forward);
gsw2.Stop();
} else {
gsw2.Start();
theFFT.FFT_dim(_tmp,_src,0,FFT::backward);
gsw2.Stop();
gsw1.Start();
for (int s=0;s<Ls;s++) {
Coeff_t a(::cos(phase*s),::sin(phase*s));
axpby_ssp(dst,a,_tmp,bzero,_tmp,s,s);
}
gsw1.Stop();
}
std::cout << GridLogMessage << "Timing rotatePV: " << gsw1.Elapsed() << ", " << gsw2.Elapsed() << std::endl;
}
void pvInv(const Vi& _src, Vi& _dst) const {
std::cout << GridLogMessage << "Fourier-Accelerated Outer Pauli Villars"<<std::endl;
typedef typename Vi::scalar_type Coeff_t;
int Ls = _dst._grid->_fdimensions[0];
GridStopWatch gswT;
gswT.Start();
RealD b,c;
get_real_const_bc(dwfPV,b,c);
RealD M5 = dwfPV.M5;
// U(true) Rightinv TMinv U(false) = Minv
Vi _src_diag(_dst._grid);
Vi _src_diag_slice(dwfPV.GaugeGrid());
Vi _dst_diag_slice(dwfPV.GaugeGrid());
Vi _src_diag_slices(grid5D);
Vi _dst_diag_slices(grid5D);
Vi _dst_diag(_dst._grid);
rotatePV(_src,_src_diag,false);
// now do TM solves
Gamma G5(Gamma::Algebra::Gamma5);
GridStopWatch gswA, gswB;
gswA.Start();
typedef typename M::Impl_t Impl;
//WilsonTMFermion<Impl> tm(x.Umu,*x.UGridF,*x.UrbGridF,0.0,0.0,solver_outer.parent.par.wparams_f);
std::vector<RealD> vmass(grid5D->_fdimensions[0],0.0);
std::vector<RealD> vmu(grid5D->_fdimensions[0],0.0);
WilsonTMFermion5D<Impl> tm(Umu,*grid5D,*gridRB5D,
*(GridCartesian*)dwfPV.GaugeGrid(),
*(GridRedBlackCartesian*)dwfPV.GaugeRedBlackGrid(),
vmass,vmu);
//SchurRedBlackDiagTwoSolve<Vi> sol(cg);
SchurRedBlackDiagMooeeSolve<Vi> sol(cg); // same performance as DiagTwo
gswA.Stop();
gswB.Start();
for (int sgroup=0;sgroup<Ls/2/group_in_s;sgroup++) {
for (int sidx=0;sidx<group_in_s;sidx++) {
int s = sgroup*group_in_s + sidx;
int sprime = Ls-s-1;
RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0);
RealD cosp = ::cos(phase);
RealD sinp = ::sin(phase);
RealD denom = b*b + c*c + 2.0*b*c*cosp;
RealD mass = -(b*b*M5 + c*(1.0 - cosp + c*M5) + b*(-1.0 + cosp + 2.0*c*cosp*M5))/denom;
RealD mu = (b+c)*sinp/denom;
vmass[2*sidx + 0] = mass;
vmass[2*sidx + 1] = mass;
vmu[2*sidx + 0] = mu;
vmu[2*sidx + 1] = -mu;
}
tm.update(vmass,vmu);
for (int sidx=0;sidx<group_in_s;sidx++) {
int s = sgroup*group_in_s + sidx;
int sprime = Ls-s-1;
ExtractSlice(_src_diag_slice,_src_diag,s,0);
InsertSlice(_src_diag_slice,_src_diag_slices,2*sidx + 0,0);
ExtractSlice(_src_diag_slice,_src_diag,sprime,0);
InsertSlice(_src_diag_slice,_src_diag_slices,2*sidx + 1,0);
}
GridStopWatch gsw;
gsw.Start();
_dst_diag_slices = zero; // zero guess
sol(tm,_src_diag_slices,_dst_diag_slices);
gsw.Stop();
std::cout << GridLogMessage << "Solve[sgroup=" << sgroup << "] completed in " << gsw.Elapsed() << ", " << gswA.Elapsed() << std::endl;
for (int sidx=0;sidx<group_in_s;sidx++) {
int s = sgroup*group_in_s + sidx;
int sprime = Ls-s-1;
RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0);
RealD cosp = ::cos(phase);
RealD sinp = ::sin(phase);
// now rotate with inverse of
Coeff_t pA = b + c*cosp;
Coeff_t pB = - Coeff_t(0.0,1.0)*c*sinp;
Coeff_t pABden = pA*pA - pB*pB;
// (pA + pB * G5) * (pA - pB*G5) = (pA^2 - pB^2)
ExtractSlice(_dst_diag_slice,_dst_diag_slices,2*sidx + 0,0);
_dst_diag_slice = (pA/pABden) * _dst_diag_slice - (pB/pABden) * (G5 * _dst_diag_slice);
InsertSlice(_dst_diag_slice,_dst_diag,s,0);
ExtractSlice(_dst_diag_slice,_dst_diag_slices,2*sidx + 1,0);
_dst_diag_slice = (pA/pABden) * _dst_diag_slice + (pB/pABden) * (G5 * _dst_diag_slice);
InsertSlice(_dst_diag_slice,_dst_diag,sprime,0);
}
}
gswB.Stop();
rotatePV(_dst_diag,_dst,true);
gswT.Stop();
std::cout << GridLogMessage << "PV completed in " << gswT.Elapsed() << " (Setup: " << gswA.Elapsed() << ", s-loop: " << gswB.Elapsed() << ")" << std::endl;
}
};
}}

View File

@ -26,7 +26,7 @@ See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <Grid/Grid.h>
namespace Grid {
namespace QCD {

View File

@ -0,0 +1,193 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/MADWF.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
template <class Fieldi, class Fieldo,IfNotSame<Fieldi,Fieldo> X=0>
inline void convert(const Fieldi &from,Fieldo &to)
{
precisionChange(to,from);
}
template <class Fieldi, class Fieldo,IfSame<Fieldi,Fieldo> X=0>
inline void convert(const Fieldi &from,Fieldo &to)
{
to=from;
}
template<class Matrixo,class Matrixi,class PVinverter,class SchurSolver, class Guesser>
class MADWF
{
private:
typedef typename Matrixo::FermionField FermionFieldo;
typedef typename Matrixi::FermionField FermionFieldi;
PVinverter & PauliVillarsSolvero;// For the outer field
SchurSolver & SchurSolveri; // For the inner approx field
Guesser & Guesseri; // To deflate the inner approx solves
Matrixo & Mato; // Action object for outer
Matrixi & Mati; // Action object for inner
RealD target_resid;
int maxiter;
public:
MADWF(Matrixo &_Mato,
Matrixi &_Mati,
PVinverter &_PauliVillarsSolvero,
SchurSolver &_SchurSolveri,
Guesser & _Guesseri,
RealD resid,
int _maxiter) :
Mato(_Mato),Mati(_Mati),
SchurSolveri(_SchurSolveri),
PauliVillarsSolvero(_PauliVillarsSolvero),Guesseri(_Guesseri)
{
target_resid=resid;
maxiter =_maxiter;
};
void operator() (const FermionFieldo &src4,FermionFieldo &sol5)
{
std::cout << GridLogMessage<< " ************************************************" << std::endl;
std::cout << GridLogMessage<< " MADWF-like algorithm " << std::endl;
std::cout << GridLogMessage<< " ************************************************" << std::endl;
FermionFieldi c0i(Mati.GaugeGrid()); // 4d
FermionFieldi y0i(Mati.GaugeGrid()); // 4d
FermionFieldo c0 (Mato.GaugeGrid()); // 4d
FermionFieldo y0 (Mato.GaugeGrid()); // 4d
FermionFieldo A(Mato.FermionGrid()); // Temporary outer
FermionFieldo B(Mato.FermionGrid()); // Temporary outer
FermionFieldo b(Mato.FermionGrid()); // 5d source
FermionFieldo c(Mato.FermionGrid()); // PVinv source; reused so store
FermionFieldo defect(Mato.FermionGrid()); // 5d source
FermionFieldi ci(Mati.FermionGrid());
FermionFieldi yi(Mati.FermionGrid());
FermionFieldi xi(Mati.FermionGrid());
FermionFieldi srci(Mati.FermionGrid());
FermionFieldi Ai(Mati.FermionGrid());
RealD m=Mati.Mass();
///////////////////////////////////////
//Import source, include Dminus factors
///////////////////////////////////////
Mato.ImportPhysicalFermionSource(src4,b);
std::cout << GridLogMessage << " src4 " <<norm2(src4)<<std::endl;
std::cout << GridLogMessage << " b " <<norm2(b)<<std::endl;
defect = b;
sol5=zero;
for (int i=0;i<maxiter;i++) {
///////////////////////////////////////
// Set up c0 from current defect
///////////////////////////////////////
PauliVillarsSolvero(Mato,defect,A);
Mato.Pdag(A,c);
ExtractSlice(c0, c, 0 , 0);
////////////////////////////////////////////////
// Solve the inner system with surface term c0
////////////////////////////////////////////////
ci = zero;
convert(c0,c0i); // Possible precison change
InsertSlice(c0i,ci,0, 0);
// Dwm P y = Dwm x = D(1) P (c0,0,0,0)^T
Mati.P(ci,Ai);
Mati.SetMass(1.0); Mati.M(Ai,srci); Mati.SetMass(m);
SchurSolveri(Mati,srci,xi,Guesseri);
Mati.Pdag(xi,yi);
ExtractSlice(y0i, yi, 0 , 0);
convert(y0i,y0); // Possible precision change
//////////////////////////////////////
// Propagate solution back to outer system
// Build Pdag PV^-1 Dm P [-sol4,c2,c3... cL]
//////////////////////////////////////
c0 = - y0;
InsertSlice(c0, c, 0 , 0);
/////////////////////////////
// Reconstruct the bulk solution Pdag PV^-1 Dm P
/////////////////////////////
Mato.P(c,B);
Mato.M(B,A);
PauliVillarsSolvero(Mato,A,B);
Mato.Pdag(B,A);
//////////////////////////////
// Reinsert surface prop
//////////////////////////////
InsertSlice(y0,A,0,0);
//////////////////////////////
// Convert from y back to x
//////////////////////////////
Mato.P(A,B);
// sol5' = sol5 + M^-1 defect
// = sol5 + M^-1 src - M^-1 M sol5 ...
sol5 = sol5 + B;
std::cout << GridLogMessage << "***************************************" <<std::endl;
std::cout << GridLogMessage << " Sol5 update "<<std::endl;
std::cout << GridLogMessage << "***************************************" <<std::endl;
std::cout << GridLogMessage << " Sol5 now "<<norm2(sol5)<<std::endl;
std::cout << GridLogMessage << " delta "<<norm2(B)<<std::endl;
// New defect = b - M sol5
Mato.M(sol5,A);
defect = b - A;
std::cout << GridLogMessage << " defect "<<norm2(defect)<<std::endl;
double resid = ::sqrt(norm2(defect) / norm2(b));
std::cout << GridLogMessage << "Residual " << i << ": " << resid << std::endl;
std::cout << GridLogMessage << "***************************************" <<std::endl;
if (resid < target_resid) {
return;
}
}
std::cout << GridLogMessage << "MADWF : Exceeded maxiter "<<std::endl;
assert(0);
}
};
}}

View File

@ -0,0 +1,95 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/SchurRedBlack.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
template<class Field>
class PauliVillarsSolverUnprec
{
public:
ConjugateGradient<Field> & CG;
PauliVillarsSolverUnprec( ConjugateGradient<Field> &_CG) : CG(_CG){};
template<class Matrix>
void operator() (Matrix &_Matrix,const Field &src,Field &sol)
{
RealD m = _Matrix.Mass();
Field A (_Matrix.FermionGrid());
MdagMLinearOperator<Matrix,Field> HermOp(_Matrix);
_Matrix.SetMass(1.0);
_Matrix.Mdag(src,A);
CG(HermOp,A,sol);
_Matrix.SetMass(m);
};
};
template<class Field,class SchurSolverType>
class PauliVillarsSolverRBprec
{
public:
SchurSolverType & SchurSolver;
PauliVillarsSolverRBprec( SchurSolverType &_SchurSolver) : SchurSolver(_SchurSolver){};
template<class Matrix>
void operator() (Matrix &_Matrix,const Field &src,Field &sol)
{
RealD m = _Matrix.Mass();
Field A (_Matrix.FermionGrid());
_Matrix.SetMass(1.0);
SchurSolver(_Matrix,src,sol);
_Matrix.SetMass(m);
};
};
template<class Field,class GaugeField>
class PauliVillarsSolverFourierAccel
{
public:
GaugeField & Umu;
ConjugateGradient<Field> & CG;
PauliVillarsSolverFourierAccel(GaugeField &_Umu,ConjugateGradient<Field> &_CG) : Umu(_Umu), CG(_CG)
{
};
template<class Matrix>
void operator() (Matrix &_Matrix,const Field &src,Field &sol)
{
FourierAcceleratedPV<Field, Matrix, typename Matrix::GaugeField > faPV(_Matrix,Umu,CG) ;
faPV.pvInv(src,sol);
};
};
}
}

View File

@ -0,0 +1,135 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/SchurRedBlack.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
namespace Grid {
namespace QCD {
template<class Field,class PVinverter> class Reconstruct5DfromPhysical {
private:
PVinverter & PauliVillarsSolver;
public:
/////////////////////////////////////////////////////
// First cut works, 10 Oct 2018.
//
// Must form a plan to get this into production for Zmobius acceleration
// of the Mobius exact AMA corrections.
//
// TODO : understand absence of contact term in eqns in Hantao's thesis
// sol4 is contact term subtracted, but thesis & Brower's paper suggests not.
//
// Step 1: Localise PV inverse in a routine. [DONE]
// Step 2: Schur based PV inverse [DONE]
// Step 3: Fourier accelerated PV inverse [DONE]
//
/////////////////////////////////////////////////////
Reconstruct5DfromPhysical(PVinverter &_PauliVillarsSolver)
: PauliVillarsSolver(_PauliVillarsSolver)
{
};
template<class Matrix>
void PV(Matrix &_Matrix,const Field &src,Field &sol)
{
RealD m = _Matrix.Mass();
_Matrix.SetMass(1.0);
_Matrix.M(src,sol);
_Matrix.SetMass(m);
}
template<class Matrix>
void PVdag(Matrix &_Matrix,const Field &src,Field &sol)
{
RealD m = _Matrix.Mass();
_Matrix.SetMass(1.0);
_Matrix.Mdag(src,sol);
_Matrix.SetMass(m);
}
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &sol4,const Field &src4, Field &sol5){
int Ls = _Matrix.Ls;
Field psi4(_Matrix.GaugeGrid());
Field psi(_Matrix.FermionGrid());
Field A (_Matrix.FermionGrid());
Field B (_Matrix.FermionGrid());
Field c (_Matrix.FermionGrid());
typedef typename Matrix::Coeff_t Coeff_t;
std::cout << GridLogMessage<< " ************************************************" << std::endl;
std::cout << GridLogMessage<< " Reconstruct5Dprop: c.f. MADWF algorithm " << std::endl;
std::cout << GridLogMessage<< " ************************************************" << std::endl;
///////////////////////////////////////
//Import source, include Dminus factors
///////////////////////////////////////
_Matrix.ImportPhysicalFermionSource(src4,B);
///////////////////////////////////////
// Set up c from src4
///////////////////////////////////////
PauliVillarsSolver(_Matrix,B,A);
_Matrix.Pdag(A,c);
//////////////////////////////////////
// Build Pdag PV^-1 Dm P [-sol4,c2,c3... cL]
//////////////////////////////////////
psi4 = - sol4;
InsertSlice(psi4, psi, 0 , 0);
for (int s=1;s<Ls;s++) {
ExtractSlice(psi4,c,s,0);
InsertSlice(psi4,psi,s,0);
}
/////////////////////////////
// Pdag PV^-1 Dm P
/////////////////////////////
_Matrix.P(psi,B);
_Matrix.M(B,A);
PauliVillarsSolver(_Matrix,A,B);
_Matrix.Pdag(B,A);
//////////////////////////////
// Reinsert surface prop
//////////////////////////////
InsertSlice(sol4,A,0,0);
//////////////////////////////
// Convert from y back to x
//////////////////////////////
_Matrix.P(A,sol5);
}
};
}
}

View File

@ -26,11 +26,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <Grid/Grid.h>
#ifdef AVX512
#include <simd/Intel512common.h>
#include <simd/Intel512avx.h>
#include <Grid/simd/Intel512common.h>
#include <Grid/simd/Intel512avx.h>
#endif
// Interleave operations from two directions
@ -679,7 +679,7 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
gauge3 =(uint64_t)&UU._odata[sU]( T );
// This is the single precision 5th direction vectorised kernel
#include <simd/Intel512single.h>
#include <Grid/simd/Intel512single.h>
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs, int sU,
@ -732,7 +732,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
}
#include <simd/Intel512double.h>
#include <Grid/simd/Intel512double.h>
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs, int sU,
@ -816,7 +816,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
// This is the single precision 5th direction vectorised kernel
#include <simd/Intel512single.h>
#include <Grid/simd/Intel512single.h>
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs, int sU,
@ -884,7 +884,7 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
#endif
}
#include <simd/Intel512double.h>
#include <Grid/simd/Intel512double.h>
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &UUU,
SiteSpinor *buf, int LLs, int sU,

View File

@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
#include <Grid/Grid.h>
#define LOAD_CHI(b) \

View File

@ -67,6 +67,7 @@ public:
public:
typedef WilsonFermion<Impl> WilsonBase;
virtual int ConstEE(void) { return 0; };
virtual void Instantiatable(void){};
// Constructors
WilsonCloverFermion(GaugeField &_Umu, GridCartesian &Fgrid,

View File

@ -939,6 +939,75 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
merge(qSiteRev, qSiteVec); \
}
// psi = chiralProjectPlus(Result_s[Ls/2-1]);
// psi+= chiralProjectMinus(Result_s[Ls/2]);
// PJ5q+=localInnerProduct(psi,psi);
template<class vobj>
Lattice<vobj> spProj5p(const Lattice<vobj> & in)
{
GridBase *grid=in._grid;
Gamma G5(Gamma::Algebra::Gamma5);
Lattice<vobj> ret(grid);
parallel_for(int ss=0;ss<grid->oSites();ss++){
ret._odata[ss] = in._odata[ss] + G5*in._odata[ss];
}
return ret;
}
template<class vobj>
Lattice<vobj> spProj5m(const Lattice<vobj> & in)
{
Gamma G5(Gamma::Algebra::Gamma5);
GridBase *grid=in._grid;
Lattice<vobj> ret(grid);
parallel_for(int ss=0;ss<grid->oSites();ss++){
ret._odata[ss] = in._odata[ss] - G5*in._odata[ss];
}
return ret;
}
template <class Impl>
void WilsonFermion5D<Impl>::ContractJ5q(FermionField &q_in,ComplexField &J5q)
{
conformable(GaugeGrid(), J5q._grid);
conformable(q_in._grid, FermionGrid());
// 4d field
int Ls = this->Ls;
FermionField psi(GaugeGrid());
FermionField p_plus (GaugeGrid());
FermionField p_minus(GaugeGrid());
FermionField p(GaugeGrid());
ExtractSlice(p_plus , q_in, Ls/2 , 0);
ExtractSlice(p_minus, q_in, Ls/2-1 , 0);
p_plus = spProj5p(p_plus );
p_minus= spProj5m(p_minus);
p=p_plus+p_minus;
J5q = localInnerProduct(p,p);
}
template <class Impl>
void WilsonFermion5D<Impl>::ContractJ5q(PropagatorField &q_in,ComplexField &J5q)
{
conformable(GaugeGrid(), J5q._grid);
conformable(q_in._grid, FermionGrid());
// 4d field
int Ls = this->Ls;
PropagatorField psi(GaugeGrid());
PropagatorField p_plus (GaugeGrid());
PropagatorField p_minus(GaugeGrid());
PropagatorField p(GaugeGrid());
ExtractSlice(p_plus , q_in, Ls/2 , 0);
ExtractSlice(p_minus, q_in, Ls/2-1 , 0);
p_plus = spProj5p(p_plus );
p_minus= spProj5m(p_minus);
p=p_plus+p_minus;
J5q = localInnerProduct(p,p);
}
template <class Impl>
void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
@ -949,6 +1018,7 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
conformable(q_in_1._grid, FermionGrid());
conformable(q_in_1._grid, q_in_2._grid);
conformable(_FourDimGrid, q_out._grid);
PropagatorField tmp1(FermionGrid()), tmp2(FermionGrid());
unsigned int LLs = q_in_1._grid->_rdimensions[0];
q_out = zero;
@ -995,7 +1065,6 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
}
template <class Impl>
void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,

View File

@ -230,6 +230,10 @@ namespace QCD {
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx);
void ContractJ5q(PropagatorField &q_in,ComplexField &J5q);
void ContractJ5q(FermionField &q_in,ComplexField &J5q);
};
}}

View File

@ -81,8 +81,8 @@ WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,Doubl
assert(0);
}
#include <qcd/action/fermion/WilsonKernelsAsmAvx512.h>
#include <qcd/action/fermion/WilsonKernelsAsmQPX.h>
#include <Grid/qcd/action/fermion/WilsonKernelsAsmAvx512.h>
#include <Grid/qcd/action/fermion/WilsonKernelsAsmQPX.h>
#define INSTANTIATE_ASM(A)\
template void WilsonKernels<A>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\

View File

@ -0,0 +1,155 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonTMFermion5D.h
Copyright (C) 2015
Author: paboyle <paboyle@ph.ed.ac.uk> ; NB Christoph did similar in GPT
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/WilsonFermion.h>
namespace Grid {
namespace QCD {
template<class Impl>
class WilsonTMFermion5D : public WilsonFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
WilsonTMFermion5D(GaugeField &_Umu,
GridCartesian &Fgrid,
GridRedBlackCartesian &Frbgrid,
GridCartesian &Ugrid,
GridRedBlackCartesian &Urbgrid,
const std::vector<RealD> _mass,
const std::vector<RealD> _mu,
const ImplParams &p= ImplParams()
) :
WilsonFermion5D<Impl>(_Umu,
Fgrid,
Frbgrid,
Ugrid,
Urbgrid,
4.0,p)
{
update(_mass,_mu);
}
virtual void Meooe(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
this->DhopEO(in, out, DaggerNo);
} else {
this->DhopOE(in, out, DaggerNo);
}
}
virtual void MeooeDag(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
this->DhopEO(in, out, DaggerYes);
} else {
this->DhopOE(in, out, DaggerYes);
}
}
// allow override for twisted mass and clover
virtual void Mooee(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
//axpibg5x(out,in,a,b); // out = a*in + b*i*G5*in
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,this->mu[s]);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual void MooeeDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,-this->mu[s]);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual void MooeeInv(const FermionField &in, FermionField &out) {
for (int s=0;s<(int)this->mass.size();s++) {
RealD m = this->mass[s];
RealD tm = this->mu[s];
RealD mtil = 4.0+this->mass[s];
RealD sq = mtil*mtil+tm*tm;
ComplexD a = mtil/sq;
ComplexD b(0.0, -tm /sq);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual void MooeeInvDag(const FermionField &in, FermionField &out) {
for (int s=0;s<(int)this->mass.size();s++) {
RealD m = this->mass[s];
RealD tm = this->mu[s];
RealD mtil = 4.0+this->mass[s];
RealD sq = mtil*mtil+tm*tm;
ComplexD a = mtil/sq;
ComplexD b(0.0,tm /sq);
axpbg5y_ssp(out,a,in,b,in,s,s);
}
}
virtual RealD M(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
this->Dhop(in, out, DaggerNo);
FermionField tmp(out._grid);
for (int s=0;s<(int)this->mass.size();s++) {
ComplexD a = 4.0+this->mass[s];
ComplexD b(0.0,this->mu[s]);
axpbg5y_ssp(tmp,a,in,b,in,s,s);
}
return axpy_norm(out, 1.0, tmp, out);
}
// needed for fast PV
void update(const std::vector<RealD>& _mass, const std::vector<RealD>& _mu) {
assert(_mass.size() == _mu.size());
assert(_mass.size() == this->FermionGrid()->_fdimensions[0]);
this->mass = _mass;
this->mu = _mu;
}
private:
std::vector<RealD> mu;
std::vector<RealD> mass;
};
typedef WilsonTMFermion5D<WilsonImplF> WilsonTMFermion5DF;
typedef WilsonTMFermion5D<WilsonImplD> WilsonTMFermion5DD;
}}

View File

@ -29,6 +29,14 @@ directory
#ifndef GRID_GAUGE_IMPL_TYPES_H
#define GRID_GAUGE_IMPL_TYPES_H
#define CPS_MD_TIME
#ifdef CPS_MD_TIME
#define HMC_MOMENTUM_DENOMINATOR (2.0)
#else
#define HMC_MOMENTUM_DENOMINATOR (1.0)
#endif
namespace Grid {
namespace QCD {
@ -38,6 +46,7 @@ namespace QCD {
#define INHERIT_GIMPL_TYPES(GImpl) \
typedef typename GImpl::Simd Simd; \
typedef typename GImpl::Scalar Scalar; \
typedef typename GImpl::LinkField GaugeLinkField; \
typedef typename GImpl::Field GaugeField; \
typedef typename GImpl::ComplexField ComplexField;\
@ -55,7 +64,8 @@ namespace QCD {
template <class S, int Nrepresentation = Nc, int Nexp = 12 > class GaugeImplTypes {
public:
typedef S Simd;
typedef typename Simd::scalar_type scalar_type;
typedef scalar_type Scalar;
template <typename vtype> using iImplScalar = iScalar<iScalar<iScalar<vtype> > >;
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
@ -87,12 +97,32 @@ public:
///////////////////////////////////////////////////////////
// Move these to another class
// HMC auxiliary functions
static inline void generate_momenta(Field &P, GridParallelRNG &pRNG) {
// specific for SU gauge fields
static inline void generate_momenta(Field &P, GridParallelRNG &pRNG)
{
// Zbigniew Srocinsky thesis:
//
// P(p) = N \Prod_{x\mu}e^-{1/2 Tr (p^2_mux)}
//
// p_x,mu = c_x,mu,a T_a
//
// Tr p^2 = sum_a,x,mu 1/2 (c_x,mu,a)^2
//
// Which implies P(p) = N \Prod_{x,\mu,a} e^-{1/4 c_xmua^2 }
//
// = N \Prod_{x,\mu,a} e^-{1/2 (c_xmua/sqrt{2})^2 }
//
// Expect c' = cxmua/sqrt(2) to be a unit variance gaussian.
//
// Expect cxmua variance sqrt(2).
//
// Must scale the momentum by sqrt(2) to invoke CPS and UKQCD conventions
//
LinkField Pmu(P._grid);
Pmu = zero;
Pmu = Zero();
for (int mu = 0; mu < Nd; mu++) {
SU<Nrepresentation>::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu);
RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR) ;
Pmu = Pmu*scale;
PokeIndex<LorentzIndex>(P, Pmu, mu);
}
}

View File

@ -4,9 +4,11 @@
Source file: ./lib/qcd/action/gauge/Photon.h
Copyright (C) 2015
Copyright (C) 2015-2018
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Antonin Portelli <antonin.portelli@me.com>
Author: James Harrison <J.Harrison@soton.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -30,11 +32,13 @@
namespace Grid{
namespace QCD{
template <class S>
class QedGimpl
class QedGImpl
{
public:
typedef S Simd;
typedef typename Simd::scalar_type Scalar;
template <typename vtype>
using iImplGaugeLink = iScalar<iScalar<iScalar<vtype>>>;
@ -43,27 +47,27 @@ namespace QCD{
typedef iImplGaugeLink<Simd> SiteLink;
typedef iImplGaugeField<Simd> SiteField;
typedef SiteField SiteComplex;
typedef SiteLink SiteComplex;
typedef Lattice<SiteLink> LinkField;
typedef Lattice<SiteField> Field;
typedef Field ComplexField;
};
typedef QedGimpl<vComplex> QedGimplR;
typedef QedGImpl<vComplex> QedGImplR;
template<class Gimpl>
template <class GImpl>
class Photon
{
public:
INHERIT_GIMPL_TYPES(Gimpl);
INHERIT_GIMPL_TYPES(GImpl);
typedef typename SiteGaugeLink::scalar_object ScalarSite;
typedef typename ScalarSite::scalar_type ScalarComplex;
GRID_SERIALIZABLE_ENUM(Gauge, undef, feynman, 1, coulomb, 2, landau, 3);
GRID_SERIALIZABLE_ENUM(ZmScheme, undef, qedL, 1, qedTL, 2, qedInf, 3);
GRID_SERIALIZABLE_ENUM(ZmScheme, undef, qedL, 1, qedTL, 2);
public:
Photon(Gauge gauge, ZmScheme zmScheme);
Photon(Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvements);
Photon(Gauge gauge, ZmScheme zmScheme, Real G0);
Photon(Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvements, Real G0);
Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvement);
Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme);
virtual ~Photon(void) = default;
void FreePropagator(const GaugeField &in, GaugeField &out);
void MomentumSpacePropagator(const GaugeField &in, GaugeField &out);
@ -73,345 +77,255 @@ namespace QCD{
const GaugeLinkField &weight);
void UnitField(GaugeField &out);
private:
void infVolPropagator(GaugeLinkField &out);
void invKHatSquared(GaugeLinkField &out);
void makeSpatialNorm(LatticeInteger &spNrm);
void makeKHat(std::vector<GaugeLinkField> &khat);
void makeInvKHatSquared(GaugeLinkField &out);
void zmSub(GaugeLinkField &out);
void transverseProjectSpatial(GaugeField &out);
void gaugeTransform(GaugeField &out);
private:
Gauge gauge_;
ZmScheme zmScheme_;
std::vector<Real> improvement_;
Real G0_;
GridBase *grid_;
Gauge gauge_;
ZmScheme zmScheme_;
std::vector<Real> improvement_;
};
typedef Photon<QedGimplR> PhotonR;
typedef Photon<QedGImplR> PhotonR;
template<class Gimpl>
Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme)
: gauge_(gauge), zmScheme_(zmScheme), improvement_(std::vector<Real>()),
G0_(0.15493339023106021408483720810737508876916113364521)
{}
template<class Gimpl>
Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme,
template<class GImpl>
Photon<GImpl>::Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme,
std::vector<Real> improvements)
: gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements),
G0_(0.15493339023106021408483720810737508876916113364521)
: grid_(grid), gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements)
{}
template<class Gimpl>
Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme, Real G0)
: gauge_(gauge), zmScheme_(zmScheme), improvement_(std::vector<Real>()), G0_(G0)
template<class GImpl>
Photon<GImpl>::Photon(GridBase *grid, Gauge gauge, ZmScheme zmScheme)
: Photon(grid, gauge, zmScheme, std::vector<Real>())
{}
template<class Gimpl>
Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme,
std::vector<Real> improvements, Real G0)
: gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements), G0_(G0)
{}
template<class Gimpl>
void Photon<Gimpl>::FreePropagator (const GaugeField &in,GaugeField &out)
template<class GImpl>
void Photon<GImpl>::FreePropagator(const GaugeField &in, GaugeField &out)
{
FFT theFFT(in._grid);
FFT theFFT(dynamic_cast<GridCartesian *>(grid_));
GaugeField in_k(grid_);
GaugeField prop_k(grid_);
GaugeField in_k(in._grid);
GaugeField prop_k(in._grid);
theFFT.FFT_all_dim(in_k,in,FFT::forward);
MomentumSpacePropagator(prop_k,in_k);
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
theFFT.FFT_all_dim(in_k, in, FFT::forward);
MomentumSpacePropagator(prop_k, in_k);
theFFT.FFT_all_dim(out, prop_k, FFT::backward);
}
template<class Gimpl>
void Photon<Gimpl>::infVolPropagator(GaugeLinkField &out)
template<class GImpl>
void Photon<GImpl>::makeSpatialNorm(LatticeInteger &spNrm)
{
auto *grid = dynamic_cast<GridCartesian *>(out._grid);
LatticeReal xmu(grid);
GaugeLinkField one(grid);
const unsigned int nd = grid->_ndimension;
std::vector<int> &l = grid->_fdimensions;
std::vector<int> x0(nd,0);
TComplex Tone = Complex(1.0,0.0);
TComplex Tzero = Complex(G0_,0.0);
FFT fft(grid);
LatticeInteger coor(grid_);
std::vector<int> l = grid_->FullDimensions();
spNrm = zero;
for(int mu = 0; mu < grid_->Nd() - 1; mu++)
{
LatticeCoordinate(coor, mu);
coor = where(coor < Integer(l[mu]/2), coor, coor - Integer(l[mu]));
spNrm = spNrm + coor*coor;
}
}
template<class GImpl>
void Photon<GImpl>::makeKHat(std::vector<GaugeLinkField> &khat)
{
const unsigned int nd = grid_->Nd();
std::vector<int> l = grid_->FullDimensions();
Complex ci(0., 1.);
khat.resize(nd, grid_);
for (unsigned int mu = 0; mu < nd; ++mu)
{
Real piL = M_PI/l[mu];
LatticeCoordinate(khat[mu], mu);
khat[mu] = exp(piL*ci*khat[mu])*2.*sin(piL*khat[mu]);
}
}
template<class GImpl>
void Photon<GImpl>::makeInvKHatSquared(GaugeLinkField &out)
{
std::vector<GaugeLinkField> khat;
GaugeLinkField lone(grid_);
const unsigned int nd = grid_->Nd();
std::vector<int> zm(nd, 0);
ScalarSite one = ScalarComplex(1., 0.), z = ScalarComplex(0., 0.);
one = Complex(1.0,0.0);
out = zero;
makeKHat(khat);
for(int mu = 0; mu < nd; mu++)
{
LatticeCoordinate(xmu,mu);
Real lo2 = l[mu]/2.0;
xmu = where(xmu < lo2, xmu, xmu-double(l[mu]));
out = out + toComplex(4*M_PI*M_PI*xmu*xmu);
out = out + khat[mu]*conjugate(khat[mu]);
}
pokeSite(Tone, out, x0);
out = one/out;
pokeSite(Tzero, out, x0);
fft.FFT_all_dim(out, out, FFT::forward);
lone = ScalarComplex(1., 0.);
pokeSite(one, out, zm);
out = lone/out;
pokeSite(z, out, zm);
}
template<class Gimpl>
void Photon<Gimpl>::invKHatSquared(GaugeLinkField &out)
template<class GImpl>
void Photon<GImpl>::zmSub(GaugeLinkField &out)
{
GridBase *grid = out._grid;
GaugeLinkField kmu(grid), one(grid);
const unsigned int nd = grid->_ndimension;
std::vector<int> &l = grid->_fdimensions;
std::vector<int> zm(nd,0);
TComplex Tone = Complex(1.0,0.0);
TComplex Tzero= Complex(0.0,0.0);
one = Complex(1.0,0.0);
out = zero;
for(int mu = 0; mu < nd; mu++)
{
Real twoPiL = M_PI*2./l[mu];
LatticeCoordinate(kmu,mu);
kmu = 2.*sin(.5*twoPiL*kmu);
out = out + kmu*kmu;
}
pokeSite(Tone, out, zm);
out = one/out;
pokeSite(Tzero, out, zm);
}
template<class Gimpl>
void Photon<Gimpl>::zmSub(GaugeLinkField &out)
{
GridBase *grid = out._grid;
const unsigned int nd = grid->_ndimension;
std::vector<int> &l = grid->_fdimensions;
switch (zmScheme_)
{
case ZmScheme::qedTL:
{
std::vector<int> zm(nd,0);
TComplex Tzero = Complex(0.0,0.0);
pokeSite(Tzero, out, zm);
std::vector<int> zm(grid_->Nd(), 0);
ScalarSite z = ScalarComplex(0., 0.);
pokeSite(z, out, zm);
break;
}
case ZmScheme::qedL:
{
LatticeInteger spNrm(grid), coor(grid);
GaugeLinkField z(grid);
spNrm = zero;
for(int d = 0; d < grid->_ndimension - 1; d++)
{
LatticeCoordinate(coor,d);
coor = where(coor < Integer(l[d]/2), coor, coor-Integer(l[d]));
spNrm = spNrm + coor*coor;
}
out = where(spNrm == Integer(0), 0.*out, out);
LatticeInteger spNrm(grid_);
// IR improvement
makeSpatialNorm(spNrm);
out = where(spNrm == Integer(0), 0.*out, out);
for(int i = 0; i < improvement_.size(); i++)
{
Real f = sqrt(improvement_[i]+1);
out = where(spNrm == Integer(i+1), f*out, out);
Real f = sqrt(improvement_[i] + 1);
out = where(spNrm == Integer(i + 1), f*out, out);
}
break;
}
default:
assert(0);
break;
}
}
template<class Gimpl>
void Photon<Gimpl>::MomentumSpacePropagator(const GaugeField &in,
GaugeField &out)
template<class GImpl>
void Photon<GImpl>::transverseProjectSpatial(GaugeField &out)
{
GridBase *grid = out._grid;
LatticeComplex momProp(grid);
switch (zmScheme_)
const unsigned int nd = grid_->Nd();
GaugeLinkField invKHat(grid_), cst(grid_), spdiv(grid_);
LatticeInteger spNrm(grid_);
std::vector<GaugeLinkField> khat, a(nd, grid_), aProj(nd, grid_);
invKHat = zero;
makeSpatialNorm(spNrm);
makeKHat(khat);
for (unsigned int mu = 0; mu < nd; ++mu)
{
case ZmScheme::qedTL:
case ZmScheme::qedL:
a[mu] = peekLorentz(out, mu);
if (mu < nd - 1)
{
invKHatSquared(momProp);
zmSub(momProp);
break;
invKHat += khat[mu]*conjugate(khat[mu]);
}
case ZmScheme::qedInf:
{
infVolPropagator(momProp);
}
cst = ScalarComplex(1., 0.);
invKHat = where(spNrm == Integer(0), cst, invKHat);
invKHat = cst/invKHat;
cst = zero;
invKHat = where(spNrm == Integer(0), cst, invKHat);
spdiv = zero;
for (unsigned int nu = 0; nu < nd - 1; ++nu)
{
spdiv += conjugate(khat[nu])*a[nu];
}
spdiv *= invKHat;
for (unsigned int mu = 0; mu < nd; ++mu)
{
aProj[mu] = a[mu] - khat[mu]*spdiv;
pokeLorentz(out, aProj[mu], mu);
}
}
template<class GImpl>
void Photon<GImpl>::gaugeTransform(GaugeField &out)
{
switch (gauge_)
{
case Gauge::feynman:
break;
case Gauge::coulomb:
transverseProjectSpatial(out);
break;
case Gauge::landau:
assert(0);
break;
}
default:
assert(0);
break;
}
}
template<class GImpl>
void Photon<GImpl>::MomentumSpacePropagator(const GaugeField &in,
GaugeField &out)
{
LatticeComplex momProp(grid_);
makeInvKHatSquared(momProp);
zmSub(momProp);
out = in*momProp;
}
template<class Gimpl>
void Photon<Gimpl>::StochasticWeight(GaugeLinkField &weight)
template<class GImpl>
void Photon<GImpl>::StochasticWeight(GaugeLinkField &weight)
{
auto *grid = dynamic_cast<GridCartesian *>(weight._grid);
const unsigned int nd = grid->_ndimension;
std::vector<int> latt_size = grid->_fdimensions;
switch (zmScheme_)
const unsigned int nd = grid_->Nd();
std::vector<int> l = grid_->FullDimensions();
Integer vol = 1;
for(unsigned int mu = 0; mu < nd; mu++)
{
case ZmScheme::qedTL:
case ZmScheme::qedL:
{
Integer vol = 1;
for(int d = 0; d < nd; d++)
{
vol = vol * latt_size[d];
}
invKHatSquared(weight);
weight = sqrt(vol)*sqrt(weight);
zmSub(weight);
break;
}
case ZmScheme::qedInf:
{
infVolPropagator(weight);
weight = sqrt(real(weight));
break;
}
default:
break;
vol = vol*l[mu];
}
makeInvKHatSquared(weight);
weight = sqrt(vol)*sqrt(weight);
zmSub(weight);
}
template<class Gimpl>
void Photon<Gimpl>::StochasticField(GaugeField &out, GridParallelRNG &rng)
template<class GImpl>
void Photon<GImpl>::StochasticField(GaugeField &out, GridParallelRNG &rng)
{
auto *grid = dynamic_cast<GridCartesian *>(out._grid);
GaugeLinkField weight(grid);
GaugeLinkField weight(grid_);
StochasticWeight(weight);
StochasticField(out, rng, weight);
}
template<class Gimpl>
void Photon<Gimpl>::StochasticField(GaugeField &out, GridParallelRNG &rng,
template<class GImpl>
void Photon<GImpl>::StochasticField(GaugeField &out, GridParallelRNG &rng,
const GaugeLinkField &weight)
{
auto *grid = dynamic_cast<GridCartesian *>(out._grid);
const unsigned int nd = grid->_ndimension;
GaugeLinkField r(grid);
GaugeField aTilde(grid);
FFT fft(grid);
const unsigned int nd = grid_->Nd();
GaugeLinkField r(grid_);
GaugeField aTilde(grid_);
FFT fft(dynamic_cast<GridCartesian *>(grid_));
switch (zmScheme_)
for(unsigned int mu = 0; mu < nd; mu++)
{
case ZmScheme::qedTL:
case ZmScheme::qedL:
{
for(int mu = 0; mu < nd; mu++)
{
gaussian(rng, r);
r = weight*r;
pokeLorentz(aTilde, r, mu);
}
break;
}
case ZmScheme::qedInf:
{
Complex shift(1., 1.); // This needs to be a GaugeLink element?
for(int mu = 0; mu < nd; mu++)
{
bernoulli(rng, r);
r = weight*(2.*r - shift);
pokeLorentz(aTilde, r, mu);
}
break;
}
default:
break;
gaussian(rng, r);
r = weight*r;
pokeLorentz(aTilde, r, mu);
}
gaugeTransform(aTilde);
fft.FFT_all_dim(out, aTilde, FFT::backward);
out = real(out);
}
template<class Gimpl>
void Photon<Gimpl>::UnitField(GaugeField &out)
template<class GImpl>
void Photon<GImpl>::UnitField(GaugeField &out)
{
auto *grid = dynamic_cast<GridCartesian *>(out._grid);
const unsigned int nd = grid->_ndimension;
GaugeLinkField r(grid);
const unsigned int nd = grid_->Nd();
GaugeLinkField r(grid_);
r = Complex(1.0,0.0);
for(int mu = 0; mu < nd; mu++)
r = ScalarComplex(1., 0.);
for(unsigned int mu = 0; mu < nd; mu++)
{
pokeLorentz(out, r, mu);
}
out = real(out);
}
// template<class Gimpl>
// void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_L(GaugeField &out,
// const GaugeField &in)
// {
//
// FeynmanGaugeMomentumSpacePropagator_TL(out,in);
//
// GridBase *grid = out._grid;
// LatticeInteger coor(grid);
// GaugeField zz(grid); zz=zero;
//
// // xyzt
// for(int d = 0; d < grid->_ndimension-1;d++){
// LatticeCoordinate(coor,d);
// out = where(coor==Integer(0),zz,out);
// }
// }
//
// template<class Gimpl>
// void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_TL(GaugeField &out,
// const GaugeField &in)
// {
//
// // what type LatticeComplex
// GridBase *grid = out._grid;
// int nd = grid->_ndimension;
//
// typedef typename GaugeField::vector_type vector_type;
// typedef typename GaugeField::scalar_type ScalComplex;
// typedef Lattice<iSinglet<vector_type> > LatComplex;
//
// std::vector<int> latt_size = grid->_fdimensions;
//
// LatComplex denom(grid); denom= zero;
// LatComplex one(grid); one = ScalComplex(1.0,0.0);
// LatComplex kmu(grid);
//
// ScalComplex ci(0.0,1.0);
// // momphase = n * 2pi / L
// for(int mu=0;mu<Nd;mu++) {
//
// LatticeCoordinate(kmu,mu);
//
// RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
//
// kmu = TwoPiL * kmu ;
//
// denom = denom + 4.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term
// }
// std::vector<int> zero_mode(nd,0);
// TComplexD Tone = ComplexD(1.0,0.0);
// TComplexD Tzero= ComplexD(0.0,0.0);
//
// pokeSite(Tone,denom,zero_mode);
//
// denom= one/denom;
//
// pokeSite(Tzero,denom,zero_mode);
//
// out = zero;
// out = in*denom;
// };
}}
#endif

View File

@ -75,7 +75,7 @@ namespace Grid{
virtual void deriv(const GaugeField &Umu,GaugeField & dSdU) {
//extend Ta to include Lorentz indexes
RealD factor_p = c_plaq/RealD(Nc)*0.5;
RealD factor_r = c_rect/RealD(Nc)*0.5;
RealD factor_r = c_rect/RealD(Nc)*0.5;
GridBase *grid = Umu._grid;

View File

@ -0,0 +1,53 @@
#pragma once
namespace Grid{
namespace QCD{
template<class Field>
void HighBoundCheck(LinearOperatorBase<Field> &HermOp,
Field &Phi,
RealD hi)
{
// Eigenvalue bound check at high end
PowerMethod<Field> power_method;
auto lambda_max = power_method(HermOp,Phi);
std::cout << GridLogMessage << "Pseudofermion action lamda_max "<<lambda_max<<"( bound "<<hi<<")"<<std::endl;
assert( (lambda_max < hi) && " High Bounds Check on operator failed" );
}
template<class Field> void InverseSqrtBoundsCheck(int MaxIter,double tol,
LinearOperatorBase<Field> &HermOp,
Field &GaussNoise,
MultiShiftFunction &PowerNegHalf)
{
GridBase *FermionGrid = GaussNoise._grid;
Field X(FermionGrid);
Field Y(FermionGrid);
Field Z(FermionGrid);
X=GaussNoise;
RealD Nx = norm2(X);
ConjugateGradientMultiShift<Field> msCG(MaxIter,PowerNegHalf);
msCG(HermOp,X,Y);
msCG(HermOp,Y,Z);
RealD Nz = norm2(Z);
HermOp.HermOp(Z,Y);
RealD Ny = norm2(Y);
X=X-Y;
RealD Nd = norm2(X);
std::cout << "************************* "<<std::endl;
std::cout << " noise = "<<Nx<<std::endl;
std::cout << " (MdagM^-1/2)^2 noise = "<<Nz<<std::endl;
std::cout << " MdagM (MdagM^-1/2)^2 noise = "<<Ny<<std::endl;
std::cout << " noise - MdagM (MdagM^-1/2)^2 noise = "<<Nd<<std::endl;
std::cout << "************************* "<<std::endl;
assert( (std::sqrt(Nd/Nx)<tol) && " InverseSqrtBoundsCheck ");
}
}
}

View File

@ -58,13 +58,30 @@ namespace QCD{
bool use_heatbath_forecasting;
AbstractEOFAFermion<Impl>& Lop; // the basic LH operator
AbstractEOFAFermion<Impl>& Rop; // the basic RH operator
SchurRedBlackDiagMooeeSolve<FermionField> Solver;
SchurRedBlackDiagMooeeSolve<FermionField> SolverHB;
SchurRedBlackDiagMooeeSolve<FermionField> SolverL;
SchurRedBlackDiagMooeeSolve<FermionField> SolverR;
SchurRedBlackDiagMooeeSolve<FermionField> DerivativeSolverL;
SchurRedBlackDiagMooeeSolve<FermionField> DerivativeSolverR;
FermionField Phi; // the pseudofermion field for this trajectory
public:
ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop, AbstractEOFAFermion<Impl>& _Rop,
OperatorFunction<FermionField>& S, Params& p, bool use_fc=false) : Lop(_Lop), Rop(_Rop), Solver(S),
Phi(_Lop.FermionGrid()), param(p), use_heatbath_forecasting(use_fc)
ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop,
AbstractEOFAFermion<Impl>& _Rop,
OperatorFunction<FermionField>& HeatbathCG,
OperatorFunction<FermionField>& ActionCGL, OperatorFunction<FermionField>& ActionCGR,
OperatorFunction<FermionField>& DerivCGL , OperatorFunction<FermionField>& DerivCGR,
Params& p,
bool use_fc=false) :
Lop(_Lop),
Rop(_Rop),
SolverHB(HeatbathCG,false,true),
SolverL(ActionCGL, false, true), SolverR(ActionCGR, false, true),
DerivativeSolverL(DerivCGL, false, true), DerivativeSolverR(DerivCGR, false, true),
Phi(_Lop.FermionGrid()),
param(p),
use_heatbath_forecasting(use_fc)
{
AlgRemez remez(param.lo, param.hi, param.precision);
@ -98,6 +115,9 @@ namespace QCD{
// We generate a Gaussian noise vector \eta, and then compute
// \Phi = M_{\rm EOFA}^{-1/2} * \eta
// using a rational approximation to the inverse square root
//
// As a check of rational require \Phi^dag M_{EOFA} \Phi == eta^dag M^-1/2^dag M M^-1/2 eta = eta^dag eta
//
virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG)
{
Lop.ImportGauge(U);
@ -118,7 +138,6 @@ namespace QCD{
RealD scale = std::sqrt(0.5);
gaussian(pRNG,eta);
eta = eta * scale;
printf("Heatbath source vector: <\\eta|\\eta> = %1.15e\n", norm2(eta));
// \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta
RealD N(PowerNegHalf.norm);
@ -139,11 +158,11 @@ namespace QCD{
if(use_heatbath_forecasting){ // Forecast CG guess using solutions from previous poles
Lop.Mdag(CG_src, Forecast_src);
CG_soln = Forecast(Lop, Forecast_src, prev_solns);
Solver(Lop, CG_src, CG_soln);
SolverHB(Lop, CG_src, CG_soln);
prev_solns.push_back(CG_soln);
} else {
CG_soln = zero; // Just use zero as the initial guess
Solver(Lop, CG_src, CG_soln);
SolverHB(Lop, CG_src, CG_soln);
}
Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
tmp[1] = tmp[1] + ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Lop.k ) * tmp[0];
@ -166,11 +185,11 @@ namespace QCD{
if(use_heatbath_forecasting){
Rop.Mdag(CG_src, Forecast_src);
CG_soln = Forecast(Rop, Forecast_src, prev_solns);
Solver(Rop, CG_src, CG_soln);
SolverHB(Rop, CG_src, CG_soln);
prev_solns.push_back(CG_soln);
} else {
CG_soln = zero;
Solver(Rop, CG_src, CG_soln);
SolverHB(Rop, CG_src, CG_soln);
}
Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
tmp[1] = tmp[1] - ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Rop.k ) * tmp[0];
@ -182,8 +201,47 @@ namespace QCD{
// Reset shift coefficients for energy and force evals
Lop.RefreshShiftCoefficients(0.0);
Rop.RefreshShiftCoefficients(-1.0);
// Bounds check
RealD EtaDagEta = norm2(eta);
// RealD PhiDagMPhi= norm2(eta);
};
void Meofa(const GaugeField& U,const FermionField &phi, FermionField & Mphi)
{
#if 0
Lop.ImportGauge(U);
Rop.ImportGauge(U);
FermionField spProj_Phi(Lop.FermionGrid());
FermionField mPhi(Lop.FermionGrid());
std::vector<FermionField> tmp(2, Lop.FermionGrid());
mPhi = phi;
// LH term: S = S - k <\Phi| P_{-} \Omega_{-}^{\dagger} H(mf)^{-1} \Omega_{-} P_{-} |\Phi>
spProj(Phi, spProj_Phi, -1, Lop.Ls);
Lop.Omega(spProj_Phi, tmp[0], -1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = zero;
SolverL(Lop, tmp[1], tmp[0]);
Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back
Lop.Omega(tmp[1], tmp[0], -1, 1);
mPhi = mPhi - Lop.k * innerProduct(spProj_Phi, tmp[0]).real();
// RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb)
// - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi>
spProj(Phi, spProj_Phi, 1, Rop.Ls);
Rop.Omega(spProj_Phi, tmp[0], 1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = zero;
SolverR(Rop, tmp[1], tmp[0]);
Rop.Dtilde(tmp[0], tmp[1]);
Rop.Omega(tmp[1], tmp[0], 1, 1);
action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real();
#endif
}
// EOFA action: see Eqn. (10) of arXiv:1706.05843
virtual RealD S(const GaugeField& U)
{
@ -201,7 +259,7 @@ namespace QCD{
Lop.Omega(spProj_Phi, tmp[0], -1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = zero;
Solver(Lop, tmp[1], tmp[0]);
SolverL(Lop, tmp[1], tmp[0]);
Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back
Lop.Omega(tmp[1], tmp[0], -1, 1);
action -= Lop.k * innerProduct(spProj_Phi, tmp[0]).real();
@ -212,7 +270,7 @@ namespace QCD{
Rop.Omega(spProj_Phi, tmp[0], 1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = zero;
Solver(Rop, tmp[1], tmp[0]);
SolverR(Rop, tmp[1], tmp[0]);
Rop.Dtilde(tmp[0], tmp[1]);
Rop.Omega(tmp[1], tmp[0], 1, 1);
action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real();
@ -234,17 +292,22 @@ namespace QCD{
GaugeField force(Lop.GaugeGrid());
/////////////////////////////////////////////
// PAB:
// Optional single precision derivative ?
/////////////////////////////////////////////
// LH: dSdU = k \chi_{L}^{\dagger} \gamma_{5} R_{5} ( \partial_{x,\mu} D_{w} ) \chi_{L}
// \chi_{L} = H(mf)^{-1} \Omega_{-} P_{-} \Phi
spProj(Phi, spProj_Phi, -1, Lop.Ls);
Lop.Omega(spProj_Phi, Omega_spProj_Phi, -1, 0);
G5R5(CG_src, Omega_spProj_Phi);
spProj_Phi = zero;
Solver(Lop, CG_src, spProj_Phi);
DerivativeSolverL(Lop, CG_src, spProj_Phi);
Lop.Dtilde(spProj_Phi, Chi);
G5R5(g5_R5_Chi, Chi);
Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
dSdU = Lop.k * force;
dSdU = -Lop.k * force;
// RH: dSdU = dSdU - k \chi_{R}^{\dagger} \gamma_{5} R_{5} ( \partial_{x,\mu} D_{w} ) \chi_{}
// \chi_{R} = ( H(mb) - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} \Phi
@ -252,11 +315,11 @@ namespace QCD{
Rop.Omega(spProj_Phi, Omega_spProj_Phi, 1, 0);
G5R5(CG_src, Omega_spProj_Phi);
spProj_Phi = zero;
Solver(Rop, CG_src, spProj_Phi);
DerivativeSolverR(Rop, CG_src, spProj_Phi);
Rop.Dtilde(spProj_Phi, Chi);
G5R5(g5_R5_Chi, Chi);
Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
dSdU = dSdU - Rop.k * force;
dSdU = dSdU + Rop.k * force;
};
};
}}

View File

@ -157,6 +157,13 @@ class OneFlavourEvenOddRationalPseudoFermionAction
msCG(Mpc, PhiOdd, Y);
if ( (rand()%param.BoundsCheckFreq)==0 ) {
FermionField gauss(FermOp.FermionRedBlackGrid());
gauss = PhiOdd;
HighBoundCheck(Mpc,gauss,param.hi);
InverseSqrtBoundsCheck(param.MaxIter,param.tolerance*100,Mpc,gauss,PowerNegHalf);
}
RealD action = norm2(Y);
std::cout << GridLogMessage << "Pseudofermion action FIXME -- is -1/4 "
"solve or -1/2 solve faster??? "

View File

@ -170,6 +170,14 @@ namespace Grid{
ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,PowerNegQuarter);
msCG_M(MdagM,X,Y);
// Randomly apply rational bounds checks.
if ( (rand()%param.BoundsCheckFreq)==0 ) {
FermionField gauss(NumOp.FermionRedBlackGrid());
gauss = PhiOdd;
HighBoundCheck(MdagM,gauss,param.hi);
InverseSqrtBoundsCheck(param.MaxIter,param.tolerance*100,MdagM,gauss,PowerNegHalf);
}
// Phidag VdagV^1/4 MdagM^-1/4 MdagM^-1/4 VdagV^1/4 Phi
RealD action = norm2(Y);

View File

@ -143,6 +143,14 @@ namespace Grid{
msCG(MdagMOp,Phi,Y);
if ( (rand()%param.BoundsCheckFreq)==0 ) {
FermionField gauss(FermOp.FermionGrid());
gauss = Phi;
HighBoundCheck(MdagMOp,gauss,param.hi);
InverseSqrtBoundsCheck(param.MaxIter,param.tolerance*100,MdagMOp,gauss,PowerNegHalf);
}
RealD action = norm2(Y);
std::cout << GridLogMessage << "Pseudofermion action FIXME -- is -1/4 solve or -1/2 solve faster??? "<<action<<std::endl;
return action;

View File

@ -156,6 +156,14 @@ namespace Grid{
ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,PowerNegQuarter);
msCG_M(MdagM,X,Y);
// Randomly apply rational bounds checks.
if ( (rand()%param.BoundsCheckFreq)==0 ) {
FermionField gauss(NumOp.FermionGrid());
gauss = Phi;
HighBoundCheck(MdagM,gauss,param.hi);
InverseSqrtBoundsCheck(param.MaxIter,param.tolerance*100,MdagM,gauss,PowerNegHalf);
}
// Phidag VdagV^1/4 MdagM^-1/4 MdagM^-1/4 VdagV^1/4 Phi
RealD action = norm2(Y);

View File

@ -29,6 +29,9 @@ directory
#ifndef QCD_PSEUDOFERMION_AGGREGATE_H
#define QCD_PSEUDOFERMION_AGGREGATE_H
// Rational functions
#include <Grid/qcd/action/pseudofermion/Bounds.h>
#include <Grid/qcd/action/pseudofermion/EvenOddSchurDifferentiable.h>
#include <Grid/qcd/action/pseudofermion/TwoFlavour.h>
#include <Grid/qcd/action/pseudofermion/TwoFlavourRatio.h>

View File

@ -85,21 +85,20 @@ class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> {
// and must multiply by 0.707....
//
// Chroma has this scale factor: two_flavor_monomial_w.h
// CPS uses this factor
// IroIro: does not use this scale. It is absorbed by a change of vars
// in the Phi integral, and thus is only an irrelevant prefactor for
// the partition function.
//
RealD scale = std::sqrt(0.5);
const RealD scale = std::sqrt(0.5);
FermionField eta(FermOp.FermionGrid());
gaussian(pRNG, eta);
gaussian(pRNG, eta); eta = scale *eta;
FermOp.ImportGauge(U);
FermOp.Mdag(eta, Phi);
Phi = Phi * scale;
};
//////////////////////////////////////////////////////

View File

@ -46,6 +46,7 @@ namespace Grid{
OperatorFunction<FermionField> &DerivativeSolver;
OperatorFunction<FermionField> &ActionSolver;
OperatorFunction<FermionField> &HeatbathSolver;
FermionField PhiOdd; // the pseudo fermion field for this trajectory
FermionField PhiEven; // the pseudo fermion field for this trajectory
@ -54,11 +55,18 @@ namespace Grid{
TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator<Impl> &_NumOp,
FermionOperator<Impl> &_DenOp,
OperatorFunction<FermionField> & DS,
OperatorFunction<FermionField> & AS) :
OperatorFunction<FermionField> & AS ) :
TwoFlavourEvenOddRatioPseudoFermionAction(_NumOp,_DenOp, DS,AS,AS) {};
TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator<Impl> &_NumOp,
FermionOperator<Impl> &_DenOp,
OperatorFunction<FermionField> & DS,
OperatorFunction<FermionField> & AS, OperatorFunction<FermionField> & HS) :
NumOp(_NumOp),
DenOp(_DenOp),
DerivativeSolver(DS),
ActionSolver(AS),
HeatbathSolver(HS),
PhiEven(_NumOp.FermionRedBlackGrid()),
PhiOdd(_NumOp.FermionRedBlackGrid())
{
@ -111,7 +119,7 @@ namespace Grid{
// Odd det factors
Mpc.MpcDag(etaOdd,PhiOdd);
tmp=zero;
ActionSolver(Vpc,PhiOdd,tmp);
HeatbathSolver(Vpc,PhiOdd,tmp);
Vpc.Mpc(tmp,PhiOdd);
// Even det factors

View File

@ -54,8 +54,8 @@ public:
template <class ReaderClass, typename std::enable_if<isReader<ReaderClass>::value, int >::type = 0 >
IntegratorParameters(ReaderClass & Reader){
std::cout << "Reading integrator\n";
read(Reader, "Integrator", *this);
std::cout << GridLogMessage << "Reading integrator\n";
read(Reader, "Integrator", *this);
}
void print_parameters() const {
@ -88,8 +88,7 @@ class Integrator {
t_P[level] += ep;
update_P(P, U, level, ep);
std::cout << GridLogIntegrator << "[" << level << "] P "
<< " dt " << ep << " : t_P " << t_P[level] << std::endl;
std::cout << GridLogIntegrator << "[" << level << "] P " << " dt " << ep << " : t_P " << t_P[level] << std::endl;
}
// to be used by the actionlevel class to iterate
@ -105,7 +104,7 @@ class Integrator {
GF force = Rep.RtoFundamentalProject(forceR); // Ta for the fundamental rep
Real force_abs = std::sqrt(norm2(force)/(U._grid->gSites()));
std::cout << GridLogIntegrator << "Hirep Force average: " << force_abs << std::endl;
Mom -= force * ep ;
Mom -= force * ep* HMC_MOMENTUM_DENOMINATOR;;
}
}
} update_P_hireps{};
@ -129,11 +128,11 @@ class Integrator {
double end_force = usecond();
Real force_abs = std::sqrt(norm2(force)/U._grid->gSites());
std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] Force average: " << force_abs << std::endl;
Mom -= force * ep;
Mom -= force * ep* HMC_MOMENTUM_DENOMINATOR;;
double end_full = usecond();
double time_full = (end_full - start_full) / 1e3;
double time_force = (end_force - start_force) / 1e3;
std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)" << std::endl;
std::cout << GridLogMessage << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)" << std::endl;
}
// Force from the other representations
@ -238,8 +237,7 @@ class Integrator {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
// get gauge field from the SmearingPolicy and
// based on the boolean is_smeared in actionID
Field& Us =
Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
as[level].actions.at(actionID)->refresh(Us, pRNG);
}
@ -252,13 +250,11 @@ class Integrator {
// over the representations
struct _S {
template <class FieldType, class Repr>
void operator()(std::vector<Action<FieldType>*> repr_set, Repr& Rep,
int level, RealD& H) {
void operator()(std::vector<Action<FieldType>*> repr_set, Repr& Rep, int level, RealD& H) {
for (int a = 0; a < repr_set.size(); ++a) {
RealD Hterm = repr_set.at(a)->S(Rep.U);
std::cout << GridLogMessage << "S Level " << level << " term " << a
<< " H Hirep = " << Hterm << std::endl;
std::cout << GridLogMessage << "S Level " << level << " term " << a << " H Hirep = " << Hterm << std::endl;
H += Hterm;
}
@ -268,20 +264,21 @@ class Integrator {
// Calculate action
RealD S(Field& U) { // here also U not used
RealD H = - FieldImplementation::FieldSquareNorm(P); // - trace (P*P)
std::cout << GridLogIntegrator << "Integrator action\n";
RealD H = - FieldImplementation::FieldSquareNorm(P)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom
RealD Hterm;
std::cout << GridLogMessage << "Momentum action H_p = " << H << "\n";
// Actions
for (int level = 0; level < as.size(); ++level) {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
// get gauge field from the SmearingPolicy and
// based on the boolean is_smeared in actionID
Field& Us =
Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
Hterm = as[level].actions.at(actionID)->S(Us);
std::cout << GridLogMessage << "S Level " << level << " term "
<< actionID << " H = " << Hterm << std::endl;
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
H += Hterm;
}
as[level].apply(S_hireps, Representations, level, H);
@ -306,8 +303,7 @@ class Integrator {
// Check the clocks all match on all levels
for (int level = 0; level < as.size(); ++level) {
assert(fabs(t_U - t_P[level]) < 1.0e-6); // must be the same
std::cout << GridLogIntegrator << " times[" << level
<< "]= " << t_P[level] << " " << t_U << std::endl;
std::cout << GridLogIntegrator << " times[" << level << "]= " << t_P[level] << " " << t_U << std::endl;
}
// and that we indeed got to the end of the trajectory

View File

@ -231,8 +231,7 @@ class ForceGradient : public Integrator<FieldImplementation, SmearingPolicy,
Field Pfg(U._grid);
Ufg = U;
Pfg = zero;
std::cout << GridLogIntegrator << "FG update " << fg_dt << " " << ep
<< std::endl;
std::cout << GridLogIntegrator << "FG update " << fg_dt << " " << ep << std::endl;
// prepare_fg; no prediction/result cache for now
// could relax CG stopping conditions for the
// derivatives in the small step since the force gets multiplied by
@ -271,8 +270,7 @@ class ForceGradient : public Integrator<FieldImplementation, SmearingPolicy,
this->step(U, level + 1, first_step, 0);
}
this->FG_update_P(U, level, 2 * Chi / ((1.0 - 2.0 * lambda) * eps),
(1.0 - 2.0 * lambda) * eps);
this->FG_update_P(U, level, 2 * Chi / ((1.0 - 2.0 * lambda) * eps), (1.0 - 2.0 * lambda) * eps);
if (level == fl) { // lowest level
this->update_U(U, 0.5 * eps);

View File

@ -11,6 +11,24 @@ const std::array<const Gamma, 4> Gamma::gmu = {{
Gamma(Gamma::Algebra::GammaZ),
Gamma(Gamma::Algebra::GammaT)}};
const std::array<const Gamma, 16> Gamma::gall = {{
Gamma(Gamma::Algebra::Identity),
Gamma(Gamma::Algebra::Gamma5),
Gamma(Gamma::Algebra::GammaX),
Gamma(Gamma::Algebra::GammaY),
Gamma(Gamma::Algebra::GammaZ),
Gamma(Gamma::Algebra::GammaT),
Gamma(Gamma::Algebra::GammaXGamma5),
Gamma(Gamma::Algebra::GammaYGamma5),
Gamma(Gamma::Algebra::GammaZGamma5),
Gamma(Gamma::Algebra::GammaTGamma5),
Gamma(Gamma::Algebra::SigmaXT),
Gamma(Gamma::Algebra::SigmaXY),
Gamma(Gamma::Algebra::SigmaXZ),
Gamma(Gamma::Algebra::SigmaYT),
Gamma(Gamma::Algebra::SigmaYZ),
Gamma(Gamma::Algebra::SigmaZT)}};
const std::array<const char *, Gamma::nGamma> Gamma::name = {{
"-Gamma5 ",
"Gamma5 ",

View File

@ -48,6 +48,7 @@ class Gamma {
static const std::array<std::array<Algebra, nGamma>, nGamma> mul;
static const std::array<Algebra, nGamma> adj;
static const std::array<const Gamma, 4> gmu;
static const std::array<const Gamma, 16> gall;
Algebra g;
public:
Gamma(Algebra initg): g(initg) {}

View File

@ -10,10 +10,10 @@
NotebookFileLineBreakTest
NotebookFileLineBreakTest
NotebookDataPosition[ 158, 7]
NotebookDataLength[ 75090, 1956]
NotebookOptionsPosition[ 69536, 1867]
NotebookOutlinePosition[ 69898, 1883]
CellTagsIndexPosition[ 69855, 1880]
NotebookDataLength[ 67118, 1714]
NotebookOptionsPosition[ 63485, 1652]
NotebookOutlinePosition[ 63842, 1668]
CellTagsIndexPosition[ 63799, 1665]
WindowFrame->Normal*)
(* Beginning of Notebook Content *)
@ -76,234 +76,6 @@ Cell[BoxData["\<\"/Users/antonin/Development/Grid/lib/qcd/spin/gamma-gen\"\>"]\
Cell[CellGroupData[{
Cell[BoxData[
RowBox[{"FactorInteger", "[", "3152", "]"}]], "Input",
CellChangeTimes->{{3.7432347536316767`*^9, 3.7432347764739027`*^9}, {
3.743234833567358*^9,
3.743234862146022*^9}},ExpressionUUID->"d1a0fd03-85e1-43af-ba80-\
3ca4235675d8"],
Cell[BoxData[
RowBox[{"{",
RowBox[{
RowBox[{"{",
RowBox[{"2", ",", "4"}], "}"}], ",",
RowBox[{"{",
RowBox[{"197", ",", "1"}], "}"}]}], "}"}]], "Output",
CellChangeTimes->{{3.743234836792224*^9,
3.743234862493619*^9}},ExpressionUUID->"16d3f953-4b24-4ed2-ae62-\
306dcab66ca7"]
}, Open ]],
Cell[CellGroupData[{
Cell[BoxData[
RowBox[{"sol", "=",
RowBox[{"Solve", "[",
RowBox[{
RowBox[{
RowBox[{
SuperscriptBox["x", "2"], "+",
SuperscriptBox["y", "2"], "+",
SuperscriptBox["z", "2"]}], "\[Equal]", "2"}], ",",
RowBox[{"{",
RowBox[{"x", ",", "y", ",", "z"}], "}"}], ",", "Integers"}],
"]"}]}]], "Input",
CellChangeTimes->{{3.743235304127721*^9,
3.7432353087929983`*^9}},ExpressionUUID->"f0fa2a5c-3d81-4d75-a447-\
50c7ca3459ff"],
Cell[BoxData[
RowBox[{"{",
RowBox[{
RowBox[{"{",
RowBox[{
RowBox[{"x", "\[Rule]",
RowBox[{"-", "1"}]}], ",",
RowBox[{"y", "\[Rule]",
RowBox[{"-", "1"}]}], ",",
RowBox[{"z", "\[Rule]", "0"}]}], "}"}], ",",
RowBox[{"{",
RowBox[{
RowBox[{"x", "\[Rule]",
RowBox[{"-", "1"}]}], ",",
RowBox[{"y", "\[Rule]", "0"}], ",",
RowBox[{"z", "\[Rule]",
RowBox[{"-", "1"}]}]}], "}"}], ",",
RowBox[{"{",
RowBox[{
RowBox[{"x", "\[Rule]",
RowBox[{"-", "1"}]}], ",",
RowBox[{"y", "\[Rule]", "0"}], ",",
RowBox[{"z", "\[Rule]", "1"}]}], "}"}], ",",
RowBox[{"{",
RowBox[{
RowBox[{"x", "\[Rule]",
RowBox[{"-", "1"}]}], ",",
RowBox[{"y", "\[Rule]", "1"}], ",",
RowBox[{"z", "\[Rule]", "0"}]}], "}"}], ",",
RowBox[{"{",
RowBox[{
RowBox[{"x", "\[Rule]", "0"}], ",",
RowBox[{"y", "\[Rule]",
RowBox[{"-", "1"}]}], ",",
RowBox[{"z", "\[Rule]",
RowBox[{"-", "1"}]}]}], "}"}], ",",
RowBox[{"{",
RowBox[{
RowBox[{"x", "\[Rule]", "0"}], ",",
RowBox[{"y", "\[Rule]",
RowBox[{"-", "1"}]}], ",",
RowBox[{"z", "\[Rule]", "1"}]}], "}"}], ",",
RowBox[{"{",
RowBox[{
RowBox[{"x", "\[Rule]", "0"}], ",",
RowBox[{"y", "\[Rule]", "1"}], ",",
RowBox[{"z", "\[Rule]",
RowBox[{"-", "1"}]}]}], "}"}], ",",
RowBox[{"{",
RowBox[{
RowBox[{"x", "\[Rule]", "0"}], ",",
RowBox[{"y", "\[Rule]", "1"}], ",",
RowBox[{"z", "\[Rule]", "1"}]}], "}"}], ",",
RowBox[{"{",
RowBox[{
RowBox[{"x", "\[Rule]", "1"}], ",",
RowBox[{"y", "\[Rule]",
RowBox[{"-", "1"}]}], ",",
RowBox[{"z", "\[Rule]", "0"}]}], "}"}], ",",
RowBox[{"{",
RowBox[{
RowBox[{"x", "\[Rule]", "1"}], ",",
RowBox[{"y", "\[Rule]", "0"}], ",",
RowBox[{"z", "\[Rule]",
RowBox[{"-", "1"}]}]}], "}"}], ",",
RowBox[{"{",
RowBox[{
RowBox[{"x", "\[Rule]", "1"}], ",",
RowBox[{"y", "\[Rule]", "0"}], ",",
RowBox[{"z", "\[Rule]", "1"}]}], "}"}], ",",
RowBox[{"{",
RowBox[{
RowBox[{"x", "\[Rule]", "1"}], ",",
RowBox[{"y", "\[Rule]", "1"}], ",",
RowBox[{"z", "\[Rule]", "0"}]}], "}"}]}], "}"}]], "Output",
CellChangeTimes->{{3.743235305220907*^9,
3.743235309139554*^9}},ExpressionUUID->"d9825c95-24bb-442a-8734-\
4c0f47e99dfc"]
}, Open ]],
Cell[BoxData[
RowBox[{
RowBox[{"xmlElem", "[", "x_", "]"}], ":=",
RowBox[{"Print", "[",
RowBox[{"\"\<<elem>\>\"", "<>",
RowBox[{"ToString", "[",
RowBox[{"x", "[",
RowBox[{"[", "1", "]"}], "]"}], "]"}], "<>", "\"\< \>\"", "<>",
RowBox[{"ToString", "[",
RowBox[{"x", "[",
RowBox[{"[", "2", "]"}], "]"}], "]"}], "<>", "\"\< \>\"", "<>",
RowBox[{"ToString", "[",
RowBox[{"x", "[",
RowBox[{"[", "3", "]"}], "]"}], "]"}], "<>", "\"\<</elem>\>\""}],
"]"}]}]], "Input",
CellChangeTimes->{{3.74323534002862*^9, 3.743235351000985*^9}, {
3.743235403233039*^9, 3.743235413488028*^9}, {3.743235473169856*^9,
3.7432354747126904`*^9}},ExpressionUUID->"aea76313-c89e-45e8-b429-\
3f454091666d"],
Cell[CellGroupData[{
Cell[BoxData[
RowBox[{
RowBox[{
RowBox[{"xmlElem", "[",
RowBox[{
RowBox[{"{",
RowBox[{"x", ",", "y", ",", "z"}], "}"}], "/.", "#"}], "]"}], "&"}], "/@",
"sol"}]], "Input",
CellChangeTimes->{{3.743235415820318*^9,
3.743235467025091*^9}},ExpressionUUID->"07da3998-8eab-40ba-8c0b-\
ac6b130cb4fb"],
Cell[CellGroupData[{
Cell[BoxData["\<\"<elem>-1 -1 0</elem>\"\>"], "Print",
CellChangeTimes->{
3.743235476581676*^9},ExpressionUUID->"c577ba06-b67a-405a-9ff5-\
2bf7dc898d03"],
Cell[BoxData["\<\"<elem>-1 0 -1</elem>\"\>"], "Print",
CellChangeTimes->{
3.743235476588011*^9},ExpressionUUID->"d041aa36-0cea-457c-9d4b-\
1fe9be66e2ab"],
Cell[BoxData["\<\"<elem>-1 0 1</elem>\"\>"], "Print",
CellChangeTimes->{
3.743235476596887*^9},ExpressionUUID->"bf141b55-86b2-4430-a994-\
5c03d5a19441"],
Cell[BoxData["\<\"<elem>-1 1 0</elem>\"\>"], "Print",
CellChangeTimes->{
3.743235476605785*^9},ExpressionUUID->"4968a660-4ecf-4b66-9071-\
8bd798c18d21"],
Cell[BoxData["\<\"<elem>0 -1 -1</elem>\"\>"], "Print",
CellChangeTimes->{
3.743235476613523*^9},ExpressionUUID->"4e22d943-2680-416b-a1d7-\
a16ca20b781f"],
Cell[BoxData["\<\"<elem>0 -1 1</elem>\"\>"], "Print",
CellChangeTimes->{
3.7432354766218576`*^9},ExpressionUUID->"6dd38385-08b3-4dd9-932f-\
98a00c6db1b2"],
Cell[BoxData["\<\"<elem>0 1 -1</elem>\"\>"], "Print",
CellChangeTimes->{
3.743235476629427*^9},ExpressionUUID->"ef3baad3-91d1-4735-9a22-\
53495a624c15"],
Cell[BoxData["\<\"<elem>0 1 1</elem>\"\>"], "Print",
CellChangeTimes->{
3.743235476638257*^9},ExpressionUUID->"413fbb68-5017-4272-a62a-\
fa234e6daaea"],
Cell[BoxData["\<\"<elem>1 -1 0</elem>\"\>"], "Print",
CellChangeTimes->{
3.743235476646203*^9},ExpressionUUID->"3a832a60-ae00-414b-a9ac-\
f5e86e67e917"],
Cell[BoxData["\<\"<elem>1 0 -1</elem>\"\>"], "Print",
CellChangeTimes->{
3.743235476653907*^9},ExpressionUUID->"bfc79ef6-f6c7-4f1e-88e8-\
005ac314be9c"],
Cell[BoxData["\<\"<elem>1 0 1</elem>\"\>"], "Print",
CellChangeTimes->{
3.743235476662575*^9},ExpressionUUID->"0f892891-f885-489c-9925-\
ddef4d698410"],
Cell[BoxData["\<\"<elem>1 1 0</elem>\"\>"], "Print",
CellChangeTimes->{
3.7432354766702337`*^9},ExpressionUUID->"2906f190-e673-4f33-9c34-\
e8e56efe7a27"]
}, Open ]],
Cell[BoxData[
RowBox[{"{",
RowBox[{
"Null", ",", "Null", ",", "Null", ",", "Null", ",", "Null", ",", "Null",
",", "Null", ",", "Null", ",", "Null", ",", "Null", ",", "Null", ",",
"Null"}], "}"}]], "Output",
CellChangeTimes->{
3.7432354246225967`*^9, {3.7432354674878073`*^9,
3.743235476678007*^9}},ExpressionUUID->"500ca3c1-88d8-46e5-a1a1-\
86a7878e5638"]
}, Open ]],
Cell[CellGroupData[{
Cell["Clifford algebra generation", "Section",
CellChangeTimes->{{3.6942089434583883`*^9,
3.694208978559093*^9}},ExpressionUUID->"a5b064b3-3011-4922-8559-\
@ -1048,9 +820,10 @@ generated by the Mathematica notebook gamma-gen/gamma-gen.nb\n\n#include \
"\"\< static const std::array<const char *, nGamma> \
name;\n static const std::array<std::array<Algebra, nGamma>, nGamma> mul;\n\
static const std::array<Algebra, nGamma> adj;\n \
static const std::array<const Gamma, 4> gmu;\n \
Algebra g;\n public:\n \
Gamma(Algebra initg): g(initg) {} \n};\n\n\>\""}]}], ";",
static const std::array<const Gamma, 4> gmu;\n static \
const std::array<const Gamma, 16> gall;\n Algebra \
g;\n public:\n \
Gamma(Algebra initg): g(initg) {} \n};\n\n\>\""}]}], ";",
"\[IndentingNewLine]",
RowBox[{"out", " ", "=",
RowBox[{"out", "<>", "funcCode"}]}], ";", "\[IndentingNewLine]",
@ -1076,7 +849,8 @@ Algebra g;\n public:\n \
3.694963343265525*^9}, {3.694964367519239*^9, 3.69496439461199*^9}, {
3.694964462130747*^9, 3.6949644669959793`*^9}, 3.694964509762739*^9, {
3.694964705045744*^9, 3.694964723148797*^9}, {3.694964992988984*^9,
3.6949649968504257`*^9}},ExpressionUUID->"c7103bd6-b539-4495-b98c-\
3.6949649968504257`*^9}, {3.758291687176977*^9,
3.758291694181189*^9}},ExpressionUUID->"c7103bd6-b539-4495-b98c-\
d4d12ac6cad8"],
Cell["Gamma enum generation:", "Text",
@ -1745,8 +1519,17 @@ namespace QCD {\>\""}]}], ";", "\[IndentingNewLine]",
"\"\<\n\nconst std::array<const Gamma, 4> Gamma::gmu = {{\n \
Gamma(Gamma::Algebra::GammaX),\n Gamma(Gamma::Algebra::GammaY),\n \
Gamma(Gamma::Algebra::GammaZ),\n Gamma(Gamma::Algebra::GammaT)}};\n\nconst \
std::array<const char *, Gamma::nGamma> Gamma::name = {{\n\>\""}]}], ";",
"\[IndentingNewLine]",
std::array<const Gamma, 16> Gamma::gall = {{\n \
Gamma(Gamma::Algebra::Identity),\n Gamma(Gamma::Algebra::Gamma5),\n \
Gamma(Gamma::Algebra::GammaX),\n Gamma(Gamma::Algebra::GammaY),\n \
Gamma(Gamma::Algebra::GammaZ),\n Gamma(Gamma::Algebra::GammaT),\n \
Gamma(Gamma::Algebra::GammaXGamma5),\n Gamma(Gamma::Algebra::GammaYGamma5),\n\
Gamma(Gamma::Algebra::GammaZGamma5),\n \
Gamma(Gamma::Algebra::GammaTGamma5),\n Gamma(Gamma::Algebra::SigmaXT), \
\n Gamma(Gamma::Algebra::SigmaXY), \n Gamma(Gamma::Algebra::SigmaXZ), \
\n Gamma(Gamma::Algebra::SigmaYT),\n Gamma(Gamma::Algebra::SigmaYZ),\n \
Gamma(Gamma::Algebra::SigmaZT)}};\n\nconst std::array<const char *, \
Gamma::nGamma> Gamma::name = {{\n\>\""}]}], ";", "\[IndentingNewLine]",
RowBox[{"Do", "[", "\[IndentingNewLine]",
RowBox[{
RowBox[{"out", " ", "=", " ",
@ -1847,7 +1630,9 @@ Gamma::nGamma> Gamma::mul = {{\\n\>\""}]}], ";", "\[IndentingNewLine]",
3.694963031525289*^9}, {3.694963065828494*^9, 3.694963098327538*^9}, {
3.6949632020836153`*^9, 3.6949632715940027`*^9}, {3.694963440035037*^9,
3.6949634418966017`*^9}, {3.6949651447067547`*^9, 3.694965161228381*^9}, {
3.694967957845581*^9, 3.694967958364184*^9}}],
3.694967957845581*^9, 3.694967958364184*^9}, {3.758291673792514*^9,
3.758291676983432*^9}},ExpressionUUID->"b1b309f8-a3a7-4081-a781-\
c3845e3cd372"],
Cell[BoxData[
RowBox[{
@ -1867,8 +1652,8 @@ Cell[BoxData[""], "Input",
},
WindowSize->{1246, 1005},
WindowMargins->{{282, Automatic}, {Automatic, 14}},
FrontEndVersion->"11.2 for Mac OS X x86 (32-bit, 64-bit Kernel) (September \
10, 2017)",
FrontEndVersion->"11.3 for Mac OS X x86 (32-bit, 64-bit Kernel) (March 5, \
2018)",
StyleDefinitions->"Default.nb"
]
(* End of Notebook Content *)
@ -1888,75 +1673,48 @@ Cell[1948, 43, 570, 11, 73, "Input",ExpressionUUID->"5c937a3e-adfd-4d7e-8fde-afb
Cell[2521, 56, 1172, 17, 34, "Output",ExpressionUUID->"72817ba6-2f6a-4a4d-8212-6f0970f49e7c"]
}, Open ]],
Cell[CellGroupData[{
Cell[3730, 78, 248, 5, 30, "Input",ExpressionUUID->"d1a0fd03-85e1-43af-ba80-3ca4235675d8"],
Cell[3981, 85, 299, 9, 34, "Output",ExpressionUUID->"16d3f953-4b24-4ed2-ae62-306dcab66ca7"]
Cell[3730, 78, 174, 3, 67, "Section",ExpressionUUID->"a5b064b3-3011-4922-8559-ead857cad102"],
Cell[3907, 83, 535, 16, 52, "Input",ExpressionUUID->"aa28f02b-31e1-4df2-9b5d-482177464b59"],
Cell[4445, 101, 250, 4, 35, "Text",ExpressionUUID->"c8896b88-f1db-4ce4-b7a6-0c9838bdb8f1"],
Cell[4698, 107, 5511, 169, 425, "Input",ExpressionUUID->"52a96ff6-047e-4043-86d0-e303866e5f8e"],
Cell[CellGroupData[{
Cell[10234, 280, 2183, 58, 135, "Input",ExpressionUUID->"8b0f4955-2c3f-418c-9226-9be8f87621e8"],
Cell[12420, 340, 1027, 27, 56, "Output",ExpressionUUID->"edd0619f-6f12-4070-a1d2-6b547877fadc"]
}, Open ]],
Cell[CellGroupData[{
Cell[4317, 99, 469, 14, 33, "Input",ExpressionUUID->"f0fa2a5c-3d81-4d75-a447-50c7ca3459ff"],
Cell[4789, 115, 2423, 77, 56, "Output",ExpressionUUID->"d9825c95-24bb-442a-8734-4c0f47e99dfc"]
Cell[13484, 372, 1543, 46, 114, "Input",ExpressionUUID->"fb45123c-c610-4075-99b0-7cd71c728ae7"],
Cell[15030, 420, 1311, 32, 87, "Output",ExpressionUUID->"2ae14565-b412-4dc0-9dce-bd6c1ba5ef27"]
}, Open ]],
Cell[7227, 195, 751, 18, 30, "Input",ExpressionUUID->"aea76313-c89e-45e8-b429-3f454091666d"],
Cell[16356, 455, 179, 3, 35, "Text",ExpressionUUID->"af247231-a58d-417b-987a-26908dafffdb"],
Cell[16538, 460, 2175, 65, 94, "Input",ExpressionUUID->"7c44cadd-e488-4f51-87d8-c64eef11f40c"],
Cell[18716, 527, 193, 3, 35, "Text",ExpressionUUID->"856f1746-1107-4509-a5ce-ac9c7f56cdb1"],
Cell[CellGroupData[{
Cell[8003, 217, 323, 10, 30, "Input",ExpressionUUID->"07da3998-8eab-40ba-8c0b-ac6b130cb4fb"],
Cell[CellGroupData[{
Cell[8351, 231, 156, 3, 24, "Print",ExpressionUUID->"c577ba06-b67a-405a-9ff5-2bf7dc898d03"],
Cell[8510, 236, 156, 3, 24, "Print",ExpressionUUID->"d041aa36-0cea-457c-9d4b-1fe9be66e2ab"],
Cell[8669, 241, 155, 3, 24, "Print",ExpressionUUID->"bf141b55-86b2-4430-a994-5c03d5a19441"],
Cell[8827, 246, 155, 3, 24, "Print",ExpressionUUID->"4968a660-4ecf-4b66-9071-8bd798c18d21"],
Cell[8985, 251, 156, 3, 24, "Print",ExpressionUUID->"4e22d943-2680-416b-a1d7-a16ca20b781f"],
Cell[9144, 256, 157, 3, 24, "Print",ExpressionUUID->"6dd38385-08b3-4dd9-932f-98a00c6db1b2"],
Cell[9304, 261, 155, 3, 24, "Print",ExpressionUUID->"ef3baad3-91d1-4735-9a22-53495a624c15"],
Cell[9462, 266, 154, 3, 24, "Print",ExpressionUUID->"413fbb68-5017-4272-a62a-fa234e6daaea"],
Cell[9619, 271, 155, 3, 24, "Print",ExpressionUUID->"3a832a60-ae00-414b-a9ac-f5e86e67e917"],
Cell[9777, 276, 155, 3, 24, "Print",ExpressionUUID->"bfc79ef6-f6c7-4f1e-88e8-005ac314be9c"],
Cell[9935, 281, 154, 3, 24, "Print",ExpressionUUID->"0f892891-f885-489c-9925-ddef4d698410"],
Cell[10092, 286, 156, 3, 24, "Print",ExpressionUUID->"2906f190-e673-4f33-9c34-e8e56efe7a27"]
}, Open ]],
Cell[10263, 292, 376, 9, 34, "Output",ExpressionUUID->"500ca3c1-88d8-46e5-a1a1-86a7878e5638"]
Cell[18934, 534, 536, 16, 30, "Input",ExpressionUUID->"8674484a-8543-434f-b177-3b27f9353212"],
Cell[19473, 552, 1705, 35, 87, "Output",ExpressionUUID->"c3b3f84d-91f6-41af-af6b-a394ca020511"]
}, Open ]],
Cell[21193, 590, 170, 3, 35, "Text",ExpressionUUID->"518a3040-54b1-4d43-8947-5c7d12efa94d"],
Cell[CellGroupData[{
Cell[10676, 306, 174, 3, 67, "Section",ExpressionUUID->"a5b064b3-3011-4922-8559-ead857cad102"],
Cell[10853, 311, 535, 16, 52, "Input",ExpressionUUID->"aa28f02b-31e1-4df2-9b5d-482177464b59"],
Cell[11391, 329, 250, 4, 35, "Text",ExpressionUUID->"c8896b88-f1db-4ce4-b7a6-0c9838bdb8f1"],
Cell[11644, 335, 5511, 169, 425, "Input",ExpressionUUID->"52a96ff6-047e-4043-86d0-e303866e5f8e"],
Cell[CellGroupData[{
Cell[17180, 508, 2183, 58, 135, "Input",ExpressionUUID->"8b0f4955-2c3f-418c-9226-9be8f87621e8"],
Cell[19366, 568, 1027, 27, 67, "Output",ExpressionUUID->"edd0619f-6f12-4070-a1d2-6b547877fadc"]
}, Open ]],
Cell[CellGroupData[{
Cell[20430, 600, 1543, 46, 114, "Input",ExpressionUUID->"fb45123c-c610-4075-99b0-7cd71c728ae7"],
Cell[21976, 648, 1311, 32, 98, "Output",ExpressionUUID->"2ae14565-b412-4dc0-9dce-bd6c1ba5ef27"]
}, Open ]],
Cell[23302, 683, 179, 3, 35, "Text",ExpressionUUID->"af247231-a58d-417b-987a-26908dafffdb"],
Cell[23484, 688, 2175, 65, 94, "Input",ExpressionUUID->"7c44cadd-e488-4f51-87d8-c64eef11f40c"],
Cell[25662, 755, 193, 3, 35, "Text",ExpressionUUID->"856f1746-1107-4509-a5ce-ac9c7f56cdb1"],
Cell[CellGroupData[{
Cell[25880, 762, 536, 16, 30, "Input",ExpressionUUID->"8674484a-8543-434f-b177-3b27f9353212"],
Cell[26419, 780, 1705, 35, 87, "Output",ExpressionUUID->"c3b3f84d-91f6-41af-af6b-a394ca020511"]
}, Open ]],
Cell[28139, 818, 170, 3, 35, "Text",ExpressionUUID->"518a3040-54b1-4d43-8947-5c7d12efa94d"],
Cell[CellGroupData[{
Cell[28334, 825, 536, 14, 30, "Input",ExpressionUUID->"61a2e974-2b39-4a07-8043-2dfd39a70569"],
Cell[28873, 841, 6754, 167, 303, "Output",ExpressionUUID->"73480ac0-3043-4077-80cc-b952a94c822a"]
Cell[21388, 597, 536, 14, 30, "Input",ExpressionUUID->"61a2e974-2b39-4a07-8043-2dfd39a70569"],
Cell[21927, 613, 6754, 167, 303, "Output",ExpressionUUID->"73480ac0-3043-4077-80cc-b952a94c822a"]
}, Open ]]
}, Open ]],
Cell[CellGroupData[{
Cell[35676, 1014, 226, 4, 67, "Section",ExpressionUUID->"4e833cd6-9f0e-4aa3-a873-3d579e874720"],
Cell[35905, 1020, 188, 4, 44, "Text",ExpressionUUID->"6d27fc04-3a60-4e03-8df7-3dd3aeee35b4"],
Cell[36096, 1026, 2980, 53, 703, "Input",ExpressionUUID->"c7103bd6-b539-4495-b98c-d4d12ac6cad8"],
Cell[39079, 1081, 221, 4, 44, "Text",ExpressionUUID->"0625593d-290f-4a39-9d80-8e2c6fdbc94e"],
Cell[39303, 1087, 4936, 150, 682, "Input",ExpressionUUID->"1ad4904c-352f-4b1d-a7c7-91e1b0549409"],
Cell[44242, 1239, 2645, 56, 199, "Input",ExpressionUUID->"0221674f-9b63-4662-91bc-ccc8c6ae9589"],
Cell[46890, 1297, 209, 4, 44, "Text",ExpressionUUID->"d2d2257a-487b-416f-bc40-abd4482225f7"],
Cell[47102, 1303, 15306, 397, 2131, "Input",ExpressionUUID->"daea68a9-c9e8-46ab-9bc8-5186e2cf477c"],
Cell[62411, 1702, 137, 2, 44, "Text",ExpressionUUID->"76ba9d5a-7ee3-4888-be7e-6377003275e8"],
Cell[62551, 1706, 521, 12, 30, "Input",ExpressionUUID->"4ec61f4c-3fd3-49ea-b5ef-6f7f04a16b34"]
Cell[28730, 786, 226, 4, 67, "Section",ExpressionUUID->"4e833cd6-9f0e-4aa3-a873-3d579e874720"],
Cell[28959, 792, 188, 4, 44, "Text",ExpressionUUID->"6d27fc04-3a60-4e03-8df7-3dd3aeee35b4"],
Cell[29150, 798, 3104, 55, 724, "Input",ExpressionUUID->"c7103bd6-b539-4495-b98c-d4d12ac6cad8"],
Cell[32257, 855, 221, 4, 44, "Text",ExpressionUUID->"0625593d-290f-4a39-9d80-8e2c6fdbc94e"],
Cell[32481, 861, 4936, 150, 682, "Input",ExpressionUUID->"1ad4904c-352f-4b1d-a7c7-91e1b0549409"],
Cell[37420, 1013, 2645, 56, 199, "Input",ExpressionUUID->"0221674f-9b63-4662-91bc-ccc8c6ae9589"],
Cell[40068, 1071, 209, 4, 44, "Text",ExpressionUUID->"d2d2257a-487b-416f-bc40-abd4482225f7"],
Cell[40280, 1077, 15306, 397, 2131, "Input",ExpressionUUID->"daea68a9-c9e8-46ab-9bc8-5186e2cf477c"],
Cell[55589, 1476, 137, 2, 44, "Text",ExpressionUUID->"76ba9d5a-7ee3-4888-be7e-6377003275e8"],
Cell[55729, 1480, 521, 12, 30, "Input",ExpressionUUID->"4ec61f4c-3fd3-49ea-b5ef-6f7f04a16b34"]
}, Open ]],
Cell[CellGroupData[{
Cell[63109, 1723, 167, 2, 67, "Section",ExpressionUUID->"a4458b3a-09b5-4e36-a1fc-781d6702b2dc"],
Cell[63279, 1727, 5693, 122, 829, "Input",ExpressionUUID->"b1b309f8-a3a7-4081-a781-c3845e3cd372"],
Cell[68975, 1851, 448, 10, 30, "Input",ExpressionUUID->"cba42949-b0f2-42ce-aebd-ffadfd83ef88"],
Cell[69426, 1863, 94, 1, 30, "Input",ExpressionUUID->"6175b72c-af9f-43c2-b4ca-bd84c48a456d"]
Cell[56287, 1497, 167, 2, 67, "Section",ExpressionUUID->"a4458b3a-09b5-4e36-a1fc-781d6702b2dc"],
Cell[56457, 1501, 6464, 133, 1207, "Input",ExpressionUUID->"b1b309f8-a3a7-4081-a781-c3845e3cd372"],
Cell[62924, 1636, 448, 10, 30, "Input",ExpressionUUID->"cba42949-b0f2-42ce-aebd-ffadfd83ef88"],
Cell[63375, 1648, 94, 1, 30, "Input",ExpressionUUID->"6175b72c-af9f-43c2-b4ca-bd84c48a456d"]
}, Open ]]
}
]

View File

@ -27,12 +27,13 @@ public:
typedef iSpinColourMatrix<vector_type> SpinColourMatrix_v;
static void MesonField(Eigen::Tensor<ComplexD,5> &mat,
template <typename TensorType> // output: rank 5 tensor, e.g. Eigen::Tensor<ComplexD, 5>
static void MesonField(TensorType &mat,
const FermionField *lhs_wi,
const FermionField *rhs_vj,
std::vector<Gamma::Algebra> gammas,
const std::vector<ComplexField > &mom,
int orthogdim);
int orthogdim, double *t_kernel = nullptr, double *t_gsum = nullptr);
static void PionFieldWVmom(Eigen::Tensor<ComplexD,4> &mat,
const FermionField *wi,
@ -59,6 +60,14 @@ public:
const FermionField *vj,
int orthogdim);
template <typename TensorType> // output: rank 5 tensor, e.g. Eigen::Tensor<ComplexD, 5>
static void AslashField(TensorType &mat,
const FermionField *lhs_wi,
const FermionField *rhs_vj,
const std::vector<ComplexField> &emB0,
const std::vector<ComplexField> &emB1,
int orthogdim, double *t_kernel = nullptr, double *t_gsum = nullptr);
static void ContractWWVV(std::vector<PropagatorField> &WWVV,
const Eigen::Tensor<ComplexD,3> &WW_sd,
const FermionField *vs,
@ -92,13 +101,14 @@ public:
#endif
};
template<class FImpl>
void A2Autils<FImpl>::MesonField(Eigen::Tensor<ComplexD,5> &mat,
template <class FImpl>
template <typename TensorType>
void A2Autils<FImpl>::MesonField(TensorType &mat,
const FermionField *lhs_wi,
const FermionField *rhs_vj,
std::vector<Gamma::Algebra> gammas,
const std::vector<ComplexField > &mom,
int orthogdim)
int orthogdim, double *t_kernel, double *t_gsum)
{
typedef typename FImpl::SiteSpinor vobj;
@ -146,6 +156,7 @@ void A2Autils<FImpl>::MesonField(Eigen::Tensor<ComplexD,5> &mat,
int stride=grid->_slice_stride[orthogdim];
// potentially wasting cores here if local time extent too small
if (t_kernel) *t_kernel = -usecond();
parallel_for(int r=0;r<rd;r++){
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
@ -212,7 +223,7 @@ void A2Autils<FImpl>::MesonField(Eigen::Tensor<ComplexD,5> &mat,
}
}}}
}
if (t_kernel) *t_kernel += usecond();
assert(mat.dimension(0) == Nmom);
assert(mat.dimension(1) == Ngamma);
assert(mat.dimension(2) == Nt);
@ -256,9 +267,9 @@ void A2Autils<FImpl>::MesonField(Eigen::Tensor<ComplexD,5> &mat,
// Vector size is 7 x 16 x 32 x 16 x 16 x sizeof(complex) = 2MB - 60MB depending on volume
// Healthy size that should suffice
////////////////////////////////////////////////////////////////////
if (t_gsum) *t_gsum = -usecond();
grid->GlobalSumVector(&mat(0,0,0,0,0),Nmom*Ngamma*Nt*Lblock*Rblock);
if (t_gsum) *t_gsum += usecond();
}
@ -614,6 +625,189 @@ void A2Autils<FImpl>::PionFieldVV(Eigen::Tensor<ComplexD,3> &mat,
PionFieldXX(mat,vi,vj,orthogdim,nog5);
}
// "A-slash" field w_i(x)^dag * i * A_mu * gamma_mu * v_j(x)
//
// With:
//
// B_0 = A_0 + i A_1
// B_1 = A_2 + i A_3
//
// then in spin space
//
// ( 0 0 -conj(B_1) -B_0 )
// i * A_mu g_mu = ( 0 0 -conj(B_0) B_1 )
// ( B_1 B_0 0 0 )
// ( conj(B_0) -conj(B_1) 0 0 )
template <class FImpl>
template <typename TensorType>
void A2Autils<FImpl>::AslashField(TensorType &mat,
const FermionField *lhs_wi,
const FermionField *rhs_vj,
const std::vector<ComplexField> &emB0,
const std::vector<ComplexField> &emB1,
int orthogdim, double *t_kernel, double *t_gsum)
{
typedef typename FermionField::vector_object vobj;
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
typedef iSpinMatrix<vector_type> SpinMatrix_v;
typedef iSpinMatrix<scalar_type> SpinMatrix_s;
typedef iSinglet<vector_type> Singlet_v;
typedef iSinglet<scalar_type> Singlet_s;
int Lblock = mat.dimension(3);
int Rblock = mat.dimension(4);
GridBase *grid = lhs_wi[0]._grid;
const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd();
int Nt = grid->GlobalDimensions()[orthogdim];
int Nem = emB0.size();
assert(emB1.size() == Nem);
int fd=grid->_fdimensions[orthogdim];
int ld=grid->_ldimensions[orthogdim];
int rd=grid->_rdimensions[orthogdim];
// will locally sum vectors first
// sum across these down to scalars
// splitting the SIMD
int MFrvol = rd*Lblock*Rblock*Nem;
int MFlvol = ld*Lblock*Rblock*Nem;
Vector<vector_type> lvSum(MFrvol);
parallel_for (int r = 0; r < MFrvol; r++)
{
lvSum[r] = zero;
}
Vector<scalar_type> lsSum(MFlvol);
parallel_for (int r = 0; r < MFlvol; r++)
{
lsSum[r] = scalar_type(0.0);
}
int e1= grid->_slice_nblock[orthogdim];
int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim];
// Nested parallelism would be ok
// Wasting cores here. Test case r
if (t_kernel) *t_kernel = -usecond();
parallel_for(int r=0;r<rd;r++)
{
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
for(int n=0;n<e1;n++)
for(int b=0;b<e2;b++)
{
int ss= so+n*stride+b;
for(int i=0;i<Lblock;i++)
{
auto left = conjugate(lhs_wi[i]._odata[ss]);
for(int j=0;j<Rblock;j++)
{
SpinMatrix_v vv;
auto right = rhs_vj[j]._odata[ss];
for(int s1=0;s1<Ns;s1++)
for(int s2=0;s2<Ns;s2++)
{
vv()(s1,s2)() = left()(s2)(0) * right()(s1)(0)
+ left()(s2)(1) * right()(s1)(1)
+ left()(s2)(2) * right()(s1)(2);
}
// After getting the sitewise product do the mom phase loop
int base = Nem*i+Nem*Lblock*j+Nem*Lblock*Rblock*r;
for ( int m=0;m<Nem;m++)
{
int idx = m+base;
auto b0 = emB0[m]._odata[ss];
auto b1 = emB1[m]._odata[ss];
auto cb0 = conjugate(b0);
auto cb1 = conjugate(b1);
lvSum[idx] += - vv()(3,0)()*b0()()() - vv()(2,0)()*cb1()()()
+ vv()(3,1)()*b1()()() - vv()(2,1)()*cb0()()()
+ vv()(0,2)()*b1()()() + vv()(1,2)()*b0()()()
+ vv()(0,3)()*cb0()()() - vv()(1,3)()*cb1()()();
}
}
}
}
}
// Sum across simd lanes in the plane, breaking out orthog dir.
parallel_for(int rt=0;rt<rd;rt++)
{
std::vector<int> icoor(Nd);
std::vector<scalar_type> extracted(Nsimd);
for(int i=0;i<Lblock;i++)
for(int j=0;j<Rblock;j++)
for(int m=0;m<Nem;m++)
{
int ij_rdx = m+Nem*i+Nem*Lblock*j+Nem*Lblock*Rblock*rt;
extract<vector_type,scalar_type>(lvSum[ij_rdx],extracted);
for(int idx=0;idx<Nsimd;idx++)
{
grid->iCoorFromIindex(icoor,idx);
int ldx = rt+icoor[orthogdim]*rd;
int ij_ldx = m+Nem*i+Nem*Lblock*j+Nem*Lblock*Rblock*ldx;
lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx];
}
}
}
if (t_kernel) *t_kernel += usecond();
// ld loop and local only??
int pd = grid->_processors[orthogdim];
int pc = grid->_processor_coor[orthogdim];
parallel_for_nest2(int lt=0;lt<ld;lt++)
{
for(int pt=0;pt<pd;pt++)
{
int t = lt + pt*ld;
if (pt == pc)
{
for(int i=0;i<Lblock;i++)
for(int j=0;j<Rblock;j++)
for(int m=0;m<Nem;m++)
{
int ij_dx = m+Nem*i + Nem*Lblock * j + Nem*Lblock * Rblock * lt;
mat(m,0,t,i,j) = lsSum[ij_dx];
}
}
else
{
const scalar_type zz(0.0);
for(int i=0;i<Lblock;i++)
for(int j=0;j<Rblock;j++)
for(int m=0;m<Nem;m++)
{
mat(m,0,t,i,j) = zz;
}
}
}
}
if (t_gsum) *t_gsum = -usecond();
grid->GlobalSumVector(&mat(0,0,0,0,0),Nem*Nt*Lblock*Rblock);
if (t_gsum) *t_gsum += usecond();
}
////////////////////////////////////////////
// Schematic thoughts about more generalised four quark insertion
@ -792,17 +986,18 @@ void A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
for(int t=0;t<N_t;t++){
for(int s=0;s<N_s;s++){
auto tmp1 = vs[s]._odata[ss];
vobj tmp2 = zero;
vobj tmp2 = zero;
vobj tmp3 = zero;
for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
Scalar_v coeff = WW_sd(t,s,d);
mac(&tmp2 ,& coeff, & vd[d]._odata[ss]);
}
tmp3 = conjugate(vd[d]._odata[ss]);
mac(&tmp2, &coeff, &tmp3);
}
//////////////////////////
// Fast outer product of tmp1 with a sum of terms suppressed by d_unroll
//////////////////////////
tmp2 = conjugate(tmp2);
for(int s1=0;s1<Ns;s1++){
for(int s2=0;s2<Ns;s2++){
WWVV[t]._odata[ss]()(s1,s2)(0,0) += tmp1()(s1)(0)*tmp2()(s2)(0);

View File

@ -0,0 +1,87 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/scalar/CovariantLaplacian.h
Copyright (C) 2016
Author: Azusa Yamaguchi
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
#pragma once
namespace Grid {
namespace QCD {
template <class Gimpl> class CovariantSmearing : public Gimpl
{
public:
INHERIT_GIMPL_TYPES(Gimpl);
typedef typename Gimpl::GaugeLinkField GaugeMat;
typedef typename Gimpl::GaugeField GaugeLorentz;
template<typename T>
static void GaussianSmear(const std::vector<LatticeColourMatrix>& U,
T& chi,
const Real& width, int Iterations, int orthog)
{
GridBase *grid = chi._grid;
T psi(grid);
////////////////////////////////////////////////////////////////////////////////////
// Follow Chroma conventions for width to keep compatibility with previous data
// Free field iterates
// chi = (1 - w^2/4N p^2)^N chi
//
// ~ (e^(-w^2/4N p^2)^N chi
// ~ (e^(-w^2/4 p^2) chi
// ~ (e^(-w'^2/2 p^2) chi [ w' = w/sqrt(2) ]
//
// Which in coordinate space is proportional to
//
// e^(-x^2/w^2) = e^(-x^2/2w'^2)
//
// The 4 is a bit unconventional from Gaussian width perspective, but... it's Chroma convention.
// 2nd derivative approx d^2/dx^2 = x+mu + x-mu - 2x
//
// d^2/dx^2 = - p^2
//
// chi = ( 1 + w^2/4N d^2/dx^2 )^N chi
//
////////////////////////////////////////////////////////////////////////////////////
Real coeff = (width*width) / Real(4*Iterations);
int dims = Nd;
if( orthog < Nd ) dims=Nd-1;
for(int n = 0; n < Iterations; ++n) {
psi = (-2.0*dims)*chi;
for(int mu=0;mu<Nd;mu++) {
if ( mu != orthog ) {
psi = psi + Gimpl::CovShiftForward(U[mu],mu,chi);
psi = psi + Gimpl::CovShiftBackward(U[mu],mu,chi);
}
}
chi = chi + coeff*psi;
}
}
};
}}

View File

@ -31,6 +31,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
namespace Grid {
namespace QCD {
template <class Gimpl>
class FourierAcceleratedGaugeFixer : public Gimpl {
public:
@ -45,30 +46,58 @@ class FourierAcceleratedGaugeFixer : public Gimpl {
A[mu] = Ta(U[mu]) * cmi;
}
}
static void DmuAmu(const std::vector<GaugeMat> &A,GaugeMat &dmuAmu) {
static void DmuAmu(const std::vector<GaugeMat> &A,GaugeMat &dmuAmu,int orthog) {
dmuAmu=zero;
for(int mu=0;mu<Nd;mu++){
dmuAmu = dmuAmu + A[mu] - Cshift(A[mu],mu,-1);
if ( mu != orthog ) {
dmuAmu = dmuAmu + A[mu] - Cshift(A[mu],mu,-1);
}
}
}
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false) {
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1) {
GridBase *grid = Umu._grid;
GaugeMat xform(grid);
SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier,orthog);
}
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1) {
GridBase *grid = Umu._grid;
Real org_plaq =WilsonLoops<Gimpl>::avgPlaquette(Umu);
Real org_link_trace=WilsonLoops<Gimpl>::linkTrace(Umu);
Real old_trace = org_link_trace;
Real trG;
xform=1.0;
std::vector<GaugeMat> U(Nd,grid);
GaugeMat dmuAmu(grid);
for(int i=0;i<maxiter;i++){
for(int mu=0;mu<Nd;mu++) U[mu]= PeekIndex<LorentzIndex>(Umu,mu);
if ( Fourier==false ) {
trG = SteepestDescentStep(U,alpha,dmuAmu);
GaugeMat dmuAmu(grid);
{
Real plaq =WilsonLoops<Gimpl>::avgPlaquette(Umu);
Real link_trace=WilsonLoops<Gimpl>::linkTrace(Umu);
if( (orthog>=0) && (orthog<Nd) ){
std::cout << GridLogMessage << " Gauge fixing to Coulomb gauge time="<<orthog<< " plaq= "<<plaq<<" link trace = "<<link_trace<< std::endl;
} else {
trG = FourierAccelSteepestDescentStep(U,alpha,dmuAmu);
std::cout << GridLogMessage << " Gauge fixing to Landau gauge plaq= "<<plaq<<" link trace = "<<link_trace<< std::endl;
}
}
for(int i=0;i<maxiter;i++){
for(int mu=0;mu<Nd;mu++) U[mu]= PeekIndex<LorentzIndex>(Umu,mu);
if ( Fourier==false ) {
trG = SteepestDescentStep(U,xform,alpha,dmuAmu,orthog);
} else {
trG = FourierAccelSteepestDescentStep(U,xform,alpha,dmuAmu,orthog);
}
// std::cout << GridLogMessage << "trG "<< trG<< std::endl;
// std::cout << GridLogMessage << "xform "<< norm2(xform)<< std::endl;
// std::cout << GridLogMessage << "dmuAmu "<< norm2(dmuAmu)<< std::endl;
for(int mu=0;mu<Nd;mu++) PokeIndex<LorentzIndex>(Umu,U[mu],mu);
// Monitor progress and convergence test
// infrequently to minimise cost overhead
@ -84,7 +113,6 @@ class FourierAcceleratedGaugeFixer : public Gimpl {
Real Phi = 1.0 - old_trace / link_trace ;
Real Omega= 1.0 - trG;
std::cout << GridLogMessage << " Iteration "<<i<< " Phi= "<<Phi<< " Omega= " << Omega<< " trG " << trG <<std::endl;
if ( (Omega < Omega_tol) && ( ::fabs(Phi) < Phi_tol) ) {
std::cout << GridLogMessage << "Converged ! "<<std::endl;
@ -96,25 +124,26 @@ class FourierAcceleratedGaugeFixer : public Gimpl {
}
}
};
static Real SteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) {
static Real SteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu,int orthog) {
GridBase *grid = U[0]._grid;
std::vector<GaugeMat> A(Nd,grid);
GaugeMat g(grid);
GaugeLinkToLieAlgebraField(U,A);
ExpiAlphaDmuAmu(A,g,alpha,dmuAmu);
ExpiAlphaDmuAmu(A,g,alpha,dmuAmu,orthog);
Real vol = grid->gSites();
Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
xform = g*xform ;
SU<Nc>::GaugeTransform(U,g);
return trG;
}
static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) {
static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu,int orthog) {
GridBase *grid = U[0]._grid;
@ -133,38 +162,41 @@ class FourierAcceleratedGaugeFixer : public Gimpl {
GaugeLinkToLieAlgebraField(U,A);
DmuAmu(A,dmuAmu);
DmuAmu(A,dmuAmu,orthog);
theFFT.FFT_all_dim(dmuAmu_p,dmuAmu,FFT::forward);
std::vector<int> mask(Nd,1);
for(int mu=0;mu<Nd;mu++) if (mu==orthog) mask[mu]=0;
theFFT.FFT_dim_mask(dmuAmu_p,dmuAmu,mask,FFT::forward);
//////////////////////////////////
// Work out Fp = psq_max/ psq...
// Avoid singularities in Fp
//////////////////////////////////
std::vector<int> latt_size = grid->GlobalDimensions();
std::vector<int> coor(grid->_ndimension,0);
for(int mu=0;mu<Nd;mu++) {
Real TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(pmu,mu);
pmu = TwoPiL * pmu ;
psq = psq + 4.0*sin(pmu*0.5)*sin(pmu*0.5);
if ( mu != orthog ) {
Real TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(pmu,mu);
pmu = TwoPiL * pmu ;
psq = psq + 4.0*sin(pmu*0.5)*sin(pmu*0.5);
}
}
Complex psqMax(16.0);
Fp = psqMax*one/psq;
/*
static int once;
if ( once == 0 ) {
std::cout << " Fp " << Fp <<std::endl;
once ++;
}*/
pokeSite(TComplex(1.0),Fp,coor);
pokeSite(TComplex(16.0),Fp,coor);
if( (orthog>=0) && (orthog<Nd) ){
for(int t=0;t<grid->GlobalDimensions()[orthog];t++){
coor[orthog]=t;
pokeSite(TComplex(16.0),Fp,coor);
}
}
dmuAmu_p = dmuAmu_p * Fp;
theFFT.FFT_all_dim(dmuAmu,dmuAmu_p,FFT::backward);
theFFT.FFT_dim_mask(dmuAmu,dmuAmu_p,mask,FFT::backward);
GaugeMat ciadmam(grid);
Complex cialpha(0.0,-alpha);
@ -173,16 +205,17 @@ class FourierAcceleratedGaugeFixer : public Gimpl {
Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
xform = g*xform ;
SU<Nc>::GaugeTransform(U,g);
return trG;
}
static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu) {
static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu,int orthog) {
GridBase *grid = g._grid;
Complex cialpha(0.0,-alpha);
GaugeMat ciadmam(grid);
DmuAmu(A,dmuAmu);
DmuAmu(A,dmuAmu,orthog);
ciadmam = dmuAmu*cialpha;
SU<Nc>::taExp(ciadmam,g);
}

View File

@ -173,6 +173,39 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
}
}
}
}
}}
// I explicitly need these outside the QCD namespace
template<typename vobj>
void G5C(Lattice<vobj> &z, const Lattice<vobj> &x)
{
GridBase *grid = x._grid;
z.checkerboard = x.checkerboard;
conformable(x, z);
QCD::Gamma G5(QCD::Gamma::Algebra::Gamma5);
z = G5 * x;
}
template<class CComplex, int nbasis>
void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x)
{
GridBase *grid = x._grid;
z.checkerboard = x.checkerboard;
conformable(x, z);
static_assert(nbasis % 2 == 0, "");
int nb = nbasis / 2;
parallel_for(int ss = 0; ss < grid->oSites(); ss++) {
for(int n = 0; n < nb; ++n) {
z._odata[ss](n) = x._odata[ss](n);
}
for(int n = nb; n < nbasis; ++n) {
z._odata[ss](n) = -x._odata[ss](n);
}
}
}
}
#endif

View File

@ -676,10 +676,18 @@ class SU {
}
}
/*
add GaugeTrans
*/
template<typename GaugeField,typename GaugeMat>
* Fundamental rep gauge xform
*/
template<typename Fundamental,typename GaugeMat>
static void GaugeTransformFundamental( Fundamental &ferm, GaugeMat &g){
GridBase *grid = ferm._grid;
conformable(grid,g._grid);
ferm = g*ferm;
}
/*
* Adjoint rep gauge xform
*/
template<typename GaugeField,typename GaugeMat>
static void GaugeTransform( GaugeField &Umu, GaugeMat &g){
GridBase *grid = Umu._grid;
conformable(grid,g._grid);

View File

@ -6,10 +6,12 @@
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: James Harrison <J.Harrison@soton.ac.uk>
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -645,6 +647,184 @@ static void StapleMult(GaugeMat &staple, const GaugeLorentz &Umu, int mu) {
}
}
}
//////////////////////////////////////////////////
// Wilson loop of size (R1, R2), oriented in mu,nu plane
//////////////////////////////////////////////////
static void wilsonLoop(GaugeMat &wl, const std::vector<GaugeMat> &U,
const int Rmu, const int Rnu,
const int mu, const int nu) {
wl = U[nu];
for(int i = 0; i < Rnu-1; i++){
wl = Gimpl::CovShiftForward(U[nu], nu, wl);
}
for(int i = 0; i < Rmu; i++){
wl = Gimpl::CovShiftForward(U[mu], mu, wl);
}
for(int i = 0; i < Rnu; i++){
wl = Gimpl::CovShiftBackward(U[nu], nu, wl);
}
for(int i = 0; i < Rmu; i++){
wl = Gimpl::CovShiftBackward(U[mu], mu, wl);
}
}
//////////////////////////////////////////////////
// trace of Wilson Loop oriented in mu,nu plane
//////////////////////////////////////////////////
static void traceWilsonLoop(LatticeComplex &wl,
const std::vector<GaugeMat> &U,
const int Rmu, const int Rnu,
const int mu, const int nu) {
GaugeMat sp(U[0]._grid);
wilsonLoop(sp, U, Rmu, Rnu, mu, nu);
wl = trace(sp);
}
//////////////////////////////////////////////////
// sum over all planes of Wilson loop
//////////////////////////////////////////////////
static void siteWilsonLoop(LatticeComplex &Wl,
const std::vector<GaugeMat> &U,
const int R1, const int R2) {
LatticeComplex siteWl(U[0]._grid);
Wl = zero;
for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) {
for (int nu = 0; nu < mu; nu++) {
traceWilsonLoop(siteWl, U, R1, R2, mu, nu);
Wl = Wl + siteWl;
traceWilsonLoop(siteWl, U, R2, R1, mu, nu);
Wl = Wl + siteWl;
}
}
}
//////////////////////////////////////////////////
// sum over planes of Wilson loop with length R1
// in the time direction
//////////////////////////////////////////////////
static void siteTimelikeWilsonLoop(LatticeComplex &Wl,
const std::vector<GaugeMat> &U,
const int R1, const int R2) {
LatticeComplex siteWl(U[0]._grid);
int ndim = U[0]._grid->_ndimension;
Wl = zero;
for (int nu = 0; nu < ndim - 1; nu++) {
traceWilsonLoop(siteWl, U, R1, R2, ndim-1, nu);
Wl = Wl + siteWl;
}
}
//////////////////////////////////////////////////
// sum Wilson loop over all planes orthogonal to the time direction
//////////////////////////////////////////////////
static void siteSpatialWilsonLoop(LatticeComplex &Wl,
const std::vector<GaugeMat> &U,
const int R1, const int R2) {
LatticeComplex siteWl(U[0]._grid);
Wl = zero;
for (int mu = 1; mu < U[0]._grid->_ndimension - 1; mu++) {
for (int nu = 0; nu < mu; nu++) {
traceWilsonLoop(siteWl, U, R1, R2, mu, nu);
Wl = Wl + siteWl;
traceWilsonLoop(siteWl, U, R2, R1, mu, nu);
Wl = Wl + siteWl;
}
}
}
//////////////////////////////////////////////////
// sum over all x,y,z,t and over all planes of Wilson loop
//////////////////////////////////////////////////
static Real sumWilsonLoop(const GaugeLorentz &Umu,
const int R1, const int R2) {
std::vector<GaugeMat> U(4, Umu._grid);
for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
}
LatticeComplex Wl(Umu._grid);
siteWilsonLoop(Wl, U, R1, R2);
TComplex Tp = sum(Wl);
Complex p = TensorRemove(Tp);
return p.real();
}
//////////////////////////////////////////////////
// sum over all x,y,z,t and over all planes of timelike Wilson loop
//////////////////////////////////////////////////
static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu,
const int R1, const int R2) {
std::vector<GaugeMat> U(4, Umu._grid);
for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
}
LatticeComplex Wl(Umu._grid);
siteTimelikeWilsonLoop(Wl, U, R1, R2);
TComplex Tp = sum(Wl);
Complex p = TensorRemove(Tp);
return p.real();
}
//////////////////////////////////////////////////
// sum over all x,y,z,t and over all planes of spatial Wilson loop
//////////////////////////////////////////////////
static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu,
const int R1, const int R2) {
std::vector<GaugeMat> U(4, Umu._grid);
for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
}
LatticeComplex Wl(Umu._grid);
siteSpatialWilsonLoop(Wl, U, R1, R2);
TComplex Tp = sum(Wl);
Complex p = TensorRemove(Tp);
return p.real();
}
//////////////////////////////////////////////////
// average over all x,y,z,t and over all planes of Wilson loop
//////////////////////////////////////////////////
static Real avgWilsonLoop(const GaugeLorentz &Umu,
const int R1, const int R2) {
int ndim = Umu._grid->_ndimension;
Real sumWl = sumWilsonLoop(Umu, R1, R2);
Real vol = Umu._grid->gSites();
Real faces = 1.0 * ndim * (ndim - 1);
return sumWl / vol / faces / Nc; // Nc dependent... FIXME
}
//////////////////////////////////////////////////
// average over all x,y,z,t and over all planes of timelike Wilson loop
//////////////////////////////////////////////////
static Real avgTimelikeWilsonLoop(const GaugeLorentz &Umu,
const int R1, const int R2) {
int ndim = Umu._grid->_ndimension;
Real sumWl = sumTimelikeWilsonLoop(Umu, R1, R2);
Real vol = Umu._grid->gSites();
Real faces = 1.0 * (ndim - 1);
return sumWl / vol / faces / Nc; // Nc dependent... FIXME
}
//////////////////////////////////////////////////
// average over all x,y,z,t and over all planes of spatial Wilson loop
//////////////////////////////////////////////////
static Real avgSpatialWilsonLoop(const GaugeLorentz &Umu,
const int R1, const int R2) {
int ndim = Umu._grid->_ndimension;
Real sumWl = sumSpatialWilsonLoop(Umu, R1, R2);
Real vol = Umu._grid->gSites();
Real faces = 1.0 * (ndim - 1) * (ndim - 2);
return sumWl / vol / faces / Nc; // Nc dependent... FIXME
}
};
typedef WilsonLoops<PeriodicGimplR> ColourWilsonLoops;

View File

@ -33,12 +33,76 @@ Author: Guido Cossu <guido.cossu@ed.ac.uk>
#include <type_traits>
#include <Grid/tensors/Tensors.h>
#include <Grid/serialisation/VectorUtils.h>
#include <Grid/Eigen/unsupported/CXX11/Tensor>
namespace Grid {
namespace EigenIO {
// EigenIO works for scalars that are not just Grid supported scalars
template<typename T, typename V = void> struct is_complex : public std::false_type {};
// Support all complex types (not just Grid complex types) - even if the definitions overlap (!)
template<typename T> struct is_complex< T , typename
std::enable_if< ::Grid::is_complex< T >::value>::type> : public std::true_type {};
template<typename T> struct is_complex<std::complex<T>, typename
std::enable_if<!::Grid::is_complex<std::complex<T>>::value>::type> : public std::true_type {};
// Helpers to support I/O for Eigen tensors of arithmetic scalars, complex types, or Grid tensors
template<typename T, typename V = void> struct is_scalar : public std::false_type {};
template<typename T> struct is_scalar<T, typename std::enable_if<std::is_arithmetic<T>::value || is_complex<T>::value>::type> : public std::true_type {};
// Is this an Eigen tensor
template<typename T> struct is_tensor : std::integral_constant<bool,
std::is_base_of<Eigen::TensorBase<T, Eigen::ReadOnlyAccessors>, T>::value> {};
// Is this an Eigen tensor of a supported scalar
template<typename T, typename V = void> struct is_tensor_of_scalar : public std::false_type {};
template<typename T> struct is_tensor_of_scalar<T, typename std::enable_if<is_tensor<T>::value && is_scalar<typename T::Scalar>::value>::type> : public std::true_type {};
// Is this an Eigen tensor of a supported container
template<typename T, typename V = void> struct is_tensor_of_container : public std::false_type {};
template<typename T> struct is_tensor_of_container<T, typename std::enable_if<is_tensor<T>::value && isGridTensor<typename T::Scalar>::value>::type> : public std::true_type {};
// These traits describe the scalars inside Eigen tensors
// I wish I could define these in reference to the scalar type (so there would be fewer traits defined)
// but I'm unable to find a syntax to make this work
template<typename T, typename V = void> struct Traits {};
// Traits are the default for scalars, or come from GridTypeMapper for GridTensors
template<typename T> struct Traits<T, typename std::enable_if<is_tensor_of_scalar<T>::value>::type>
: public GridTypeMapper_Base {
using scalar_type = typename T::Scalar; // ultimate base scalar
static constexpr bool is_complex = ::Grid::EigenIO::is_complex<scalar_type>::value;
};
// Traits are the default for scalars, or come from GridTypeMapper for GridTensors
template<typename T> struct Traits<T, typename std::enable_if<is_tensor_of_container<T>::value>::type> {
using BaseTraits = GridTypeMapper<typename T::Scalar>;
using scalar_type = typename BaseTraits::scalar_type; // ultimate base scalar
static constexpr bool is_complex = ::Grid::EigenIO::is_complex<scalar_type>::value;
static constexpr int TensorLevel = BaseTraits::TensorLevel;
static constexpr int Rank = BaseTraits::Rank;
static constexpr std::size_t count = BaseTraits::count;
static constexpr int Dimension(int dim) { return BaseTraits::Dimension(dim); }
};
// Is this a fixed-size Eigen tensor
template<typename T> struct is_tensor_fixed : public std::false_type {};
template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType>
struct is_tensor_fixed<Eigen::TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType>>
: public std::true_type {};
template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType,
int MapOptions_, template <class> class MapPointer_>
struct is_tensor_fixed<Eigen::TensorMap<Eigen::TensorFixedSize<Scalar_, Dimensions_,
Options_, IndexType>, MapOptions_, MapPointer_>>
: public std::true_type {};
// Is this a variable-size Eigen tensor
template<typename T, typename V = void> struct is_tensor_variable : public std::false_type {};
template<typename T> struct is_tensor_variable<T, typename std::enable_if<is_tensor<T>::value
&& !is_tensor_fixed<T>::value>::type> : public std::true_type {};
}
// Abstract writer/reader classes ////////////////////////////////////////////
// static polymorphism implemented using CRTP idiom
class Serializable;
// Static abstract writer
template <typename T>
class Writer
@ -49,10 +113,10 @@ namespace Grid {
void push(const std::string &s);
void pop(void);
template <typename U>
typename std::enable_if<std::is_base_of<Serializable, U>::value, void>::type
typename std::enable_if<std::is_base_of<Serializable, U>::value>::type
write(const std::string& s, const U &output);
template <typename U>
typename std::enable_if<!std::is_base_of<Serializable, U>::value, void>::type
typename std::enable_if<!std::is_base_of<Serializable, U>::value && !EigenIO::is_tensor<U>::value>::type
write(const std::string& s, const U &output);
template <typename U>
void write(const std::string &s, const iScalar<U> &output);
@ -60,6 +124,42 @@ namespace Grid {
void write(const std::string &s, const iVector<U, N> &output);
template <typename U, int N>
void write(const std::string &s, const iMatrix<U, N> &output);
template <typename ETensor>
typename std::enable_if<EigenIO::is_tensor<ETensor>::value>::type
write(const std::string &s, const ETensor &output);
// Helper functions for Scalar vs Container specialisations
template <typename ETensor>
inline typename std::enable_if<EigenIO::is_tensor_of_scalar<ETensor>::value,
const typename ETensor::Scalar *>::type
getFirstScalar(const ETensor &output)
{
return output.data();
}
template <typename ETensor>
inline typename std::enable_if<EigenIO::is_tensor_of_container<ETensor>::value,
const typename EigenIO::Traits<ETensor>::scalar_type *>::type
getFirstScalar(const ETensor &output)
{
return output.data()->begin();
}
template <typename S>
inline typename std::enable_if<EigenIO::is_scalar<S>::value, void>::type
copyScalars(S * &pCopy, const S &Source)
{
* pCopy ++ = Source;
}
template <typename S>
inline typename std::enable_if<isGridTensor<S>::value, void>::type
copyScalars(typename GridTypeMapper<S>::scalar_type * &pCopy, const S &Source)
{
for( const typename GridTypeMapper<S>::scalar_type &item : Source )
* pCopy ++ = item;
}
void scientificFormat(const bool set);
bool isScientific(void);
void setPrecision(const unsigned int prec);
@ -83,7 +183,8 @@ namespace Grid {
typename std::enable_if<std::is_base_of<Serializable, U>::value, void>::type
read(const std::string& s, U &output);
template <typename U>
typename std::enable_if<!std::is_base_of<Serializable, U>::value, void>::type
typename std::enable_if<!std::is_base_of<Serializable, U>::value
&& !EigenIO::is_tensor<U>::value, void>::type
read(const std::string& s, U &output);
template <typename U>
void read(const std::string &s, iScalar<U> &output);
@ -91,6 +192,32 @@ namespace Grid {
void read(const std::string &s, iVector<U, N> &output);
template <typename U, int N>
void read(const std::string &s, iMatrix<U, N> &output);
template <typename ETensor>
typename std::enable_if<EigenIO::is_tensor<ETensor>::value, void>::type
read(const std::string &s, ETensor &output);
template <typename ETensor>
typename std::enable_if<EigenIO::is_tensor_fixed<ETensor>::value, void>::type
Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims );
template <typename ETensor>
typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type
Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims );
// Helper functions for Scalar vs Container specialisations
template <typename S>
inline typename std::enable_if<EigenIO::is_scalar<S>::value, void>::type
copyScalars(S &Dest, const S * &pSource)
{
Dest = * pSource ++;
}
template <typename S>
inline typename std::enable_if<isGridTensor<S>::value, void>::type
copyScalars(S &Dest, const typename GridTypeMapper<S>::scalar_type * &pSource)
{
for( typename GridTypeMapper<S>::scalar_type &item : Dest )
item = * pSource ++;
}
protected:
template <typename U>
void fromString(U &output, const std::string &s);
@ -135,12 +262,14 @@ namespace Grid {
template <typename T>
template <typename U>
typename std::enable_if<!std::is_base_of<Serializable, U>::value, void>::type
typename std::enable_if<!std::is_base_of<Serializable, U>::value
&& !EigenIO::is_tensor<U>::value, void>::type
Writer<T>::write(const std::string &s, const U &output)
{
upcast->writeDefault(s, output);
}
template <typename T>
template <typename U>
void Writer<T>::write(const std::string &s, const iScalar<U> &output)
@ -161,6 +290,57 @@ namespace Grid {
{
upcast->writeDefault(s, tensorToVec(output));
}
// Eigen::Tensors of Grid tensors (iScalar, iVector, iMatrix)
template <typename T>
template <typename ETensor>
typename std::enable_if<EigenIO::is_tensor<ETensor>::value, void>::type
Writer<T>::write(const std::string &s, const ETensor &output)
{
using Index = typename ETensor::Index;
using Container = typename ETensor::Scalar; // NB: could be same as scalar
using Traits = EigenIO::Traits<ETensor>;
using Scalar = typename Traits::scalar_type; // type of the underlying scalar
constexpr unsigned int TensorRank{ETensor::NumIndices};
constexpr unsigned int ContainerRank{Traits::Rank}; // Only non-zero for containers
constexpr unsigned int TotalRank{TensorRank + ContainerRank};
const Index NumElements{output.size()};
assert( NumElements > 0 );
// Get the dimensionality of the tensor
std::vector<std::size_t> TotalDims(TotalRank);
for(auto i = 0; i < TensorRank; i++ ) {
auto dim = output.dimension(i);
TotalDims[i] = static_cast<size_t>(dim);
assert( TotalDims[i] == dim ); // check we didn't lose anything in the conversion
}
for(auto i = 0; i < ContainerRank; i++ )
TotalDims[TensorRank + i] = Traits::Dimension(i);
// If the Tensor isn't in Row-Major order, then we'll need to copy it's data
const bool CopyData{NumElements > 1 && ETensor::Layout != Eigen::StorageOptions::RowMajor};
const Scalar * pWriteBuffer;
std::vector<Scalar> CopyBuffer;
const Index TotalNumElements = NumElements * Traits::count;
if( !CopyData ) {
pWriteBuffer = getFirstScalar( output );
} else {
// Regardless of the Eigen::Tensor storage order, the copy will be Row Major
CopyBuffer.resize( TotalNumElements );
Scalar * pCopy = &CopyBuffer[0];
pWriteBuffer = pCopy;
std::array<Index, TensorRank> MyIndex;
for( auto &idx : MyIndex ) idx = 0;
for( auto n = 0; n < NumElements; n++ ) {
const Container & c = output( MyIndex );
copyScalars( pCopy, c );
// Now increment the index
for( int i = output.NumDimensions - 1; i >= 0 && ++MyIndex[i] == output.dimension(i); i-- )
MyIndex[i] = 0;
}
}
upcast->template writeMultiDim<Scalar>(s, TotalDims, pWriteBuffer, TotalNumElements);
}
template <typename T>
void Writer<T>::scientificFormat(const bool set)
@ -215,7 +395,8 @@ namespace Grid {
template <typename T>
template <typename U>
typename std::enable_if<!std::is_base_of<Serializable, U>::value, void>::type
typename std::enable_if<!std::is_base_of<Serializable, U>::value
&& !EigenIO::is_tensor<U>::value, void>::type
Reader<T>::read(const std::string &s, U &output)
{
upcast->readDefault(s, output);
@ -251,6 +432,79 @@ namespace Grid {
vecToTensor(output, v);
}
template <typename T>
template <typename ETensor>
typename std::enable_if<EigenIO::is_tensor<ETensor>::value, void>::type
Reader<T>::read(const std::string &s, ETensor &output)
{
using Index = typename ETensor::Index;
using Container = typename ETensor::Scalar; // NB: could be same as scalar
using Traits = EigenIO::Traits<ETensor>;
using Scalar = typename Traits::scalar_type; // type of the underlying scalar
constexpr unsigned int TensorRank{ETensor::NumIndices};
constexpr unsigned int ContainerRank{Traits::Rank}; // Only non-zero for containers
constexpr unsigned int TotalRank{TensorRank + ContainerRank};
using ETDims = std::array<Index, TensorRank>; // Dimensions of the tensor
// read the (flat) data and dimensionality
std::vector<std::size_t> dimData;
std::vector<Scalar> buf;
upcast->readMultiDim( s, buf, dimData );
assert(dimData.size() == TotalRank && "EigenIO: Tensor rank mismatch" );
// Make sure that the number of elements read matches dimensions read
std::size_t NumContainers = 1;
for( auto i = 0 ; i < TensorRank ; i++ )
NumContainers *= dimData[i];
// If our scalar object is a Container, make sure it's dimensions match what we read back
std::size_t ElementsPerContainer = 1;
for( auto i = 0 ; i < ContainerRank ; i++ ) {
assert( dimData[TensorRank+i] == Traits::Dimension(i) && "Tensor Container dimensions don't match data" );
ElementsPerContainer *= dimData[TensorRank+i];
}
assert( NumContainers * ElementsPerContainer == buf.size() && "EigenIO: Number of elements != product of dimensions" );
// Now see whether the tensor is the right shape, or can be made to be
const auto & dims = output.dimensions();
bool bShapeOK = (output.data() != nullptr);
for( auto i = 0; bShapeOK && i < TensorRank ; i++ )
if( dims[i] != dimData[i] )
bShapeOK = false;
// Make the tensor the same size as the data read
ETDims MyIndex;
if( !bShapeOK ) {
for( auto i = 0 ; i < TensorRank ; i++ )
MyIndex[i] = dimData[i];
Reshape(output, MyIndex);
}
// Copy the data into the tensor
for( auto &d : MyIndex ) d = 0;
const Scalar * pSource = &buf[0];
for( std::size_t n = 0 ; n < NumContainers ; n++ ) {
Container & c = output( MyIndex );
copyScalars( c, pSource );
// Now increment the index
for( int i = TensorRank - 1; i != -1 && ++MyIndex[i] == dims[i]; i-- )
MyIndex[i] = 0;
}
assert( pSource == &buf[NumContainers * ElementsPerContainer] );
}
template <typename T>
template <typename ETensor>
typename std::enable_if<EigenIO::is_tensor_fixed<ETensor>::value, void>::type
Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims )
{
assert( 0 && "EigenIO: Fixed tensor dimensions can't be changed" );
}
template <typename T>
template <typename ETensor>
typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type
Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims )
{
//t.reshape( dims );
t.resize( dims );
}
template <typename T>
template <typename U>
void Reader<T>::fromString(U &output, const std::string &s)
@ -289,8 +543,70 @@ namespace Grid {
{
return os;
}
template <typename T1, typename T2>
static inline typename std::enable_if<!EigenIO::is_tensor<T1>::value || !EigenIO::is_tensor<T2>::value, bool>::type
CompareMember(const T1 &lhs, const T2 &rhs) {
return lhs == rhs;
}
template <typename T1, typename T2>
static inline typename std::enable_if<EigenIO::is_tensor<T1>::value && EigenIO::is_tensor<T2>::value, bool>::type
CompareMember(const T1 &lhs, const T2 &rhs) {
// First check whether dimensions match (Eigen tensor library will assert if they don't match)
bool bReturnValue = (T1::NumIndices == T2::NumIndices);
for( auto i = 0 ; bReturnValue && i < T1::NumIndices ; i++ )
bReturnValue = ( lhs.dimension(i) == rhs.dimension(i) );
if( bReturnValue ) {
Eigen::Tensor<bool, 0, T1::Options> bResult = (lhs == rhs).all();
bReturnValue = bResult(0);
}
return bReturnValue;
}
template <typename T>
static inline typename std::enable_if<EigenIO::is_tensor<T>::value, bool>::type
CompareMember(const std::vector<T> &lhs, const std::vector<T> &rhs) {
const auto NumElements = lhs.size();
bool bResult = ( NumElements == rhs.size() );
for( auto i = 0 ; i < NumElements && bResult ; i++ )
bResult = CompareMember(lhs[i], rhs[i]);
return bResult;
}
template <typename T>
static inline typename std::enable_if<!EigenIO::is_tensor<T>::value, void>::type
WriteMember(std::ostream &os, const T &object) {
os << object;
}
template <typename T>
static inline typename std::enable_if<EigenIO::is_tensor<T>::value, void>::type
WriteMember(std::ostream &os, const T &object) {
using Index = typename T::Index;
const Index NumElements{object.size()};
assert( NumElements > 0 );
Index count = 1;
os << "T<";
for( int i = 0; i < T::NumIndices; i++ ) {
Index dim = object.dimension(i);
count *= dim;
if( i )
os << ",";
os << dim;
}
assert( count == NumElements && "Number of elements doesn't match tensor dimensions" );
os << ">{";
const typename T::Scalar * p = object.data();
for( Index i = 0; i < count; i++ ) {
if( i )
os << ",";
os << *p++;
}
os << "}";
}
};
// Generic writer interface //////////////////////////////////////////////////
template <typename T>
inline void push(Writer<T> &w, const std::string &s) {

View File

@ -51,6 +51,8 @@ namespace Grid {
template <typename U>
void writeDefault(const std::string &s, const std::vector<U> &x);
void writeDefault(const std::string &s, const char *x);
template <typename U>
void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
private:
std::ofstream file_;
};
@ -66,6 +68,8 @@ namespace Grid {
void readDefault(const std::string &s, U &output);
template <typename U>
void readDefault(const std::string &s, std::vector<U> &output);
template <typename U>
void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
private:
std::ifstream file_;
};
@ -92,6 +96,27 @@ namespace Grid {
}
}
template <typename U>
void BinaryWriter::writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements)
{
uint64_t rank = static_cast<uint64_t>( Dimensions.size() );
uint64_t tmp = 1;
for( auto i = 0 ; i < rank ; i++ )
tmp *= Dimensions[i];
assert( tmp == NumElements && "Dimensions don't match size of data being written" );
// Total number of elements
write("", tmp);
// Number of dimensions
write("", rank);
// Followed by each dimension
for( auto i = 0 ; i < rank ; i++ ) {
tmp = Dimensions[i];
write("", tmp);
}
for( auto i = 0; i < NumElements; ++i)
write("", pDataRowMajor[i]);
}
// Reader template implementation ////////////////////////////////////////////
template <typename U>
void BinaryReader::readDefault(const std::string &s, U &output)
@ -114,6 +139,30 @@ namespace Grid {
read("", output[i]);
}
}
template <typename U>
void BinaryReader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim)
{
// Number of elements
uint64_t NumElements;
read("", NumElements);
// Number of dimensions
uint64_t rank;
read("", rank);
// Followed by each dimension
uint64_t count = 1;
dim.resize(rank);
uint64_t tmp;
for( auto i = 0 ; i < rank ; i++ ) {
read("", tmp);
dim[i] = tmp;
count *= tmp;
}
assert( count == NumElements && "Dimensions don't match size of data being read" );
buf.resize(count);
for( auto i = 0; i < count; ++i)
read("", buf[i]);
}
}
#endif

View File

@ -61,9 +61,9 @@ Group & Hdf5Writer::getGroup(void)
}
// Reader implementation ///////////////////////////////////////////////////////
Hdf5Reader::Hdf5Reader(const std::string &fileName)
Hdf5Reader::Hdf5Reader(const std::string &fileName, const bool readOnly)
: fileName_(fileName)
, file_(fileName.c_str(), H5F_ACC_RDWR)
, file_(fileName.c_str(), readOnly ? H5F_ACC_RDONLY : H5F_ACC_RDWR)
{
group_ = file_.openGroup("/");
readSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold",

View File

@ -3,6 +3,7 @@
#include <stack>
#include <string>
#include <list>
#include <vector>
#include <H5Cpp.h>
#include <Grid/tensors/Tensors.h>
@ -38,6 +39,8 @@ namespace Grid
template <typename U>
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
writeDefault(const std::string &s, const std::vector<U> &x);
template <typename U>
void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
H5NS::Group & getGroup(void);
private:
template <typename U>
@ -48,13 +51,13 @@ namespace Grid
std::vector<std::string> path_;
H5NS::H5File file_;
H5NS::Group group_;
unsigned int dataSetThres_{HDF5_DEF_DATASET_THRES};
const unsigned int dataSetThres_{HDF5_DEF_DATASET_THRES};
};
class Hdf5Reader: public Reader<Hdf5Reader>
{
public:
Hdf5Reader(const std::string &fileName);
Hdf5Reader(const std::string &fileName, const bool readOnly = true);
virtual ~Hdf5Reader(void) = default;
bool push(const std::string &s);
void pop(void);
@ -66,6 +69,8 @@ namespace Grid
template <typename U>
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
readDefault(const std::string &s, std::vector<U> &x);
template <typename U>
void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
H5NS::Group & getGroup(void);
private:
template <typename U>
@ -101,6 +106,75 @@ namespace Grid
template <>
void Hdf5Writer::writeDefault(const std::string &s, const std::string &x);
template <typename U>
void Hdf5Writer::writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements)
{
// Hdf5 needs the dimensions as hsize_t
const int rank = static_cast<int>(Dimensions.size());
std::vector<hsize_t> dim(rank);
for(int i = 0; i < rank; i++)
dim[i] = Dimensions[i];
// write the entire dataset to file
H5NS::DataSpace dataSpace(rank, dim.data());
if (NumElements > dataSetThres_)
{
// Make sure 1) each dimension; and 2) chunk size is < 4GB
const hsize_t MaxElements = ( sizeof( U ) == 1 ) ? 0xffffffff : 0x100000000 / sizeof( U );
hsize_t ElementsPerChunk = 1;
bool bTooBig = false;
for( int i = rank - 1 ; i != -1 ; i-- ) {
auto &d = dim[i];
if( bTooBig )
d = 1; // Chunk size is already as big as can be - remaining dimensions = 1
else {
// If individual dimension too big, reduce by prime factors if possible
while( d > MaxElements && ( d & 1 ) == 0 )
d >>= 1;
const char ErrorMsg[] = " dimension > 4GB and not divisible by 2^n. "
"Hdf5IO chunk size will be inefficient. NB Serialisation is not intended for large datasets - please consider alternatives.";
if( d > MaxElements ) {
std::cout << GridLogWarning << "Individual" << ErrorMsg << std::endl;
hsize_t quotient = d / MaxElements;
if( d % MaxElements )
quotient++;
d /= quotient;
}
// Now make sure overall size is not too big
hsize_t OverflowCheck = ElementsPerChunk;
ElementsPerChunk *= d;
assert( OverflowCheck == ElementsPerChunk / d && "Product of dimensions overflowed hsize_t" );
// If product of dimensions too big, reduce by prime factors
while( ElementsPerChunk > MaxElements && ( ElementsPerChunk & 1 ) == 0 ) {
bTooBig = true;
d >>= 1;
ElementsPerChunk >>= 1;
}
if( ElementsPerChunk > MaxElements ) {
std::cout << GridLogWarning << "Product of" << ErrorMsg << std::endl;
hsize_t quotient = ElementsPerChunk / MaxElements;
if( ElementsPerChunk % MaxElements )
quotient++;
d /= quotient;
ElementsPerChunk /= quotient;
}
}
}
H5NS::DataSet dataSet;
H5NS::DSetCreatPropList plist;
plist.setChunk(rank, dim.data());
plist.setFletcher32();
dataSet = group_.createDataSet(s, Hdf5Type<U>::type(), dataSpace, plist);
dataSet.write(pDataRowMajor, Hdf5Type<U>::type());
}
else
{
H5NS::Attribute attribute;
attribute = group_.createAttribute(s, Hdf5Type<U>::type(), dataSpace);
attribute.write(Hdf5Type<U>::type(), pDataRowMajor);
}
}
template <typename U>
typename std::enable_if<element<std::vector<U>>::is_number, void>::type
Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x)
@ -110,31 +184,11 @@ namespace Grid
// flatten the vector and getting dimensions
Flatten<std::vector<U>> flat(x);
std::vector<hsize_t> dim;
std::vector<size_t> dim;
const auto &flatx = flat.getFlatVector();
for (auto &d: flat.getDim())
{
dim.push_back(d);
}
// write to file
H5NS::DataSpace dataSpace(dim.size(), dim.data());
if (flatx.size() > dataSetThres_)
{
H5NS::DataSet dataSet;
dataSet = group_.createDataSet(s, Hdf5Type<Element>::type(), dataSpace);
dataSet.write(flatx.data(), Hdf5Type<Element>::type());
}
else
{
H5NS::Attribute attribute;
attribute = group_.createAttribute(s, Hdf5Type<Element>::type(), dataSpace);
attribute.write(Hdf5Type<Element>::type(), flatx.data());
}
writeMultiDim<Element>(s, dim, &flatx[0], flatx.size());
}
template <typename U>
@ -170,10 +224,9 @@ namespace Grid
template <>
void Hdf5Reader::readDefault(const std::string &s, std::string &x);
template <typename U>
typename std::enable_if<element<std::vector<U>>::is_number, void>::type
Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
void Hdf5Reader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim)
{
// alias to element type
typedef typename element<std::vector<U>>::type Element;
@ -181,7 +234,6 @@ namespace Grid
// read the dimensions
H5NS::DataSpace dataSpace;
std::vector<hsize_t> hdim;
std::vector<size_t> dim;
hsize_t size = 1;
if (group_.attrExists(s))
@ -201,8 +253,8 @@ namespace Grid
}
// read the flat vector
std::vector<Element> buf(size);
buf.resize(size);
if (size > dataSetThres_)
{
H5NS::DataSet dataSet;
@ -217,7 +269,19 @@ namespace Grid
attribute = group_.openAttribute(s);
attribute.read(Hdf5Type<Element>::type(), buf.data());
}
}
template <typename U>
typename std::enable_if<element<std::vector<U>>::is_number, void>::type
Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
{
// alias to element type
typedef typename element<std::vector<U>>::type Element;
std::vector<size_t> dim;
std::vector<Element> buf;
readMultiDim( s, buf, dim );
// reconstruct the multidimensional vector
Reconstruct<std::vector<U>> r(buf, dim);

View File

@ -109,8 +109,8 @@ THE SOFTWARE.
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define GRID_MACRO_MEMBER(A,B) A B;
#define GRID_MACRO_COMP_MEMBER(A,B) result = (result and (lhs. B == rhs. B));
#define GRID_MACRO_OS_WRITE_MEMBER(A,B) os<< #A <<" " #B << " = " << obj. B << " ; " <<std::endl;
#define GRID_MACRO_COMP_MEMBER(A,B) result = (result and CompareMember(lhs. B, rhs. B));
#define GRID_MACRO_OS_WRITE_MEMBER(A,B) os<< #A <<" " #B << " = "; WriteMember( os, obj. B ); os << " ; " <<std::endl;
#define GRID_MACRO_READ_MEMBER(A,B) Grid::read(RD,#B,obj. B);
#define GRID_MACRO_WRITE_MEMBER(A,B) Grid::write(WR,#B,obj. B);

View File

@ -51,6 +51,8 @@ namespace Grid
void writeDefault(const std::string &s, const U &x);
template <typename U>
void writeDefault(const std::string &s, const std::vector<U> &x);
template <typename U>
void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
private:
void indent(void);
private:
@ -69,6 +71,8 @@ namespace Grid
void readDefault(const std::string &s, U &output);
template <typename U>
void readDefault(const std::string &s, std::vector<U> &output);
template <typename U>
void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
private:
void checkIndent(void);
private:
@ -95,7 +99,18 @@ namespace Grid
write(s, x[i]);
}
}
template <typename U>
void TextWriter::writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements)
{
uint64_t Rank = Dimensions.size();
write(s, Rank);
for( uint64_t d : Dimensions )
write(s, d);
while( NumElements-- )
write(s, *pDataRowMajor++);
}
// Reader template implementation ////////////////////////////////////////////
template <typename U>
void TextReader::readDefault(const std::string &s, U &output)
@ -121,6 +136,23 @@ namespace Grid
read("", output[i]);
}
}
template <typename U>
void TextReader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim)
{
const char sz[] = "";
uint64_t Rank;
read(sz, Rank);
dim.resize( Rank );
size_t NumElements = 1;
for( auto &d : dim ) {
read(sz, d);
NumElements *= d;
}
buf.resize( NumElements );
for( auto &x : buf )
read(s, x);
}
}
#endif

View File

@ -1,3 +1,32 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./Grid/serialisation/VectorUtils.h
Copyright (C) 2015
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_SERIALISATION_VECTORUTILS_H
#define GRID_SERIALISATION_VECTORUTILS_H
@ -53,6 +82,17 @@ namespace Grid {
return os;
}
// std::vector<std:vector<...>> nested to specified Rank //////////////////////////////////
template<typename T, unsigned int Rank>
struct NestedStdVector {
typedef typename std::vector<typename NestedStdVector<T, Rank - 1>::type> type;
};
template<typename T>
struct NestedStdVector<T,0> {
typedef T type;
};
// Grid scalar tensors to nested std::vectors //////////////////////////////////
template <typename T>
struct TensorToVec
@ -436,4 +476,4 @@ std::string vecToStr(const std::vector<T> &v)
return sstr.str();
}
#endif
#endif

View File

@ -57,6 +57,8 @@ namespace Grid
void writeDefault(const std::string &s, const U &x);
template <typename U>
void writeDefault(const std::string &s, const std::vector<U> &x);
template <typename U>
void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
std::string docString(void);
std::string string(void);
private:
@ -79,6 +81,8 @@ namespace Grid
void readDefault(const std::string &s, U &output);
template <typename U>
void readDefault(const std::string &s, std::vector<U> &output);
template <typename U>
void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
void readCurrentSubtree(std::string &s);
private:
void checkParse(const pugi::xml_parse_result &result, const std::string name);
@ -122,13 +126,45 @@ namespace Grid
void XmlWriter::writeDefault(const std::string &s, const std::vector<U> &x)
{
push(s);
for (auto &x_i: x)
for( auto &u : x )
{
write("elem", x_i);
write("elem", u);
}
pop();
}
template <typename U>
void XmlWriter::writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements)
{
push(s);
size_t count = 1;
const int Rank = static_cast<int>( Dimensions.size() );
write("rank", Rank );
std::vector<size_t> MyIndex( Rank );
for( auto d : Dimensions ) {
write("dim", d);
count *= d;
}
assert( count == NumElements && "XmlIO : element count doesn't match dimensions" );
static const char sName[] = "tensor";
for( int i = 0 ; i < Rank ; i++ ) {
MyIndex[i] = 0;
push(sName);
}
while (NumElements--) {
write("elem", *pDataRowMajor++);
int i;
for( i = Rank - 1 ; i != -1 && ++MyIndex[i] == Dimensions[i] ; i-- )
MyIndex[i] = 0;
int Rollover = Rank - 1 - i;
for( i = 0 ; i < Rollover ; i++ )
pop();
for( i = 0 ; NumElements && i < Rollover ; i++ )
push(sName);
}
pop();
}
// Reader template implementation ////////////////////////////////////////////
template <typename U>
void XmlReader::readDefault(const std::string &s, U &output)
@ -145,25 +181,66 @@ namespace Grid
template <typename U>
void XmlReader::readDefault(const std::string &s, std::vector<U> &output)
{
std::string buf;
unsigned int i = 0;
if (!push(s))
{
std::cout << GridLogWarning << "XML: cannot open node '" << s << "'";
std::cout << std::endl;
return;
} else {
for(unsigned int i = 0; node_.child("elem"); )
{
output.resize(i + 1);
read("elem", output[i++]);
node_.child("elem").set_name("elem-done");
}
pop();
}
}
template <typename U>
void XmlReader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim)
{
if (!push(s))
{
std::cout << GridLogWarning << "XML: cannot open node '" << s << "'";
std::cout << std::endl;
} else {
static const char sName[] = "tensor";
static const char sNameDone[] = "tensor-done";
int Rank;
read("rank", Rank);
dim.resize( Rank );
size_t NumElements = 1;
for( auto &d : dim )
{
read("dim", d);
node_.child("dim").set_name("dim-done");
NumElements *= d;
}
buf.resize( NumElements );
std::vector<size_t> MyIndex( Rank );
for( int i = 0 ; i < Rank ; i++ ) {
MyIndex[i] = 0;
push(sName);
}
for( auto &x : buf )
{
NumElements--;
read("elem", x);
node_.child("elem").set_name("elem-done");
int i;
for( i = Rank - 1 ; i != -1 && ++MyIndex[i] == dim[i] ; i-- )
MyIndex[i] = 0;
int Rollover = Rank - 1 - i;
for( i = 0 ; i < Rollover ; i++ ) {
node_.set_name(sNameDone);
pop();
}
for( i = 0 ; NumElements && i < Rollover ; i++ )
push(sName);
}
pop();
}
while (node_.child("elem"))
{
output.resize(i + 1);
read("elem", output[i]);
node_.child("elem").set_name("elem-done");
i++;
}
pop();
}
}
#endif

View File

@ -485,83 +485,6 @@ namespace Optimization {
// Some Template specialization
// Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
#ifndef __INTEL_COMPILER
#warning "Slow reduction due to incomplete reduce intrinsics"
//Complex float Reduce
template<>
inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
__m512 v1,v2;
v1=Optimization::Permute::Permute0(in); // avx 512; quad complex single
v1= _mm512_add_ps(v1,in);
v2=Optimization::Permute::Permute1(v1);
v1 = _mm512_add_ps(v1,v2);
v2=Optimization::Permute::Permute2(v1);
v1 = _mm512_add_ps(v1,v2);
u512f conv; conv.v = v1;
return Grid::ComplexF(conv.f[0],conv.f[1]);
}
//Real float Reduce
template<>
inline Grid::RealF Reduce<Grid::RealF, __m512>::operator()(__m512 in){
__m512 v1,v2;
v1 = Optimization::Permute::Permute0(in); // avx 512; octo-double
v1 = _mm512_add_ps(v1,in);
v2 = Optimization::Permute::Permute1(v1);
v1 = _mm512_add_ps(v1,v2);
v2 = Optimization::Permute::Permute2(v1);
v1 = _mm512_add_ps(v1,v2);
v2 = Optimization::Permute::Permute3(v1);
v1 = _mm512_add_ps(v1,v2);
u512f conv; conv.v=v1;
return conv.f[0];
}
//Complex double Reduce
template<>
inline Grid::ComplexD Reduce<Grid::ComplexD, __m512d>::operator()(__m512d in){
__m512d v1;
v1 = Optimization::Permute::Permute0(in); // sse 128; paired complex single
v1 = _mm512_add_pd(v1,in);
v1 = Optimization::Permute::Permute1(in); // sse 128; paired complex single
v1 = _mm512_add_pd(v1,in);
u512d conv; conv.v = v1;
return Grid::ComplexD(conv.f[0],conv.f[1]);
}
//Real double Reduce
template<>
inline Grid::RealD Reduce<Grid::RealD, __m512d>::operator()(__m512d in){
__m512d v1,v2;
v1 = Optimization::Permute::Permute0(in); // avx 512; quad double
v1 = _mm512_add_pd(v1,in);
v2 = Optimization::Permute::Permute1(v1);
v1 = _mm512_add_pd(v1,v2);
v2 = Optimization::Permute::Permute2(v1);
v1 = _mm512_add_pd(v1,v2);
u512d conv; conv.v = v1;
return conv.f[0];
}
//Integer Reduce
template<>
inline Integer Reduce<Integer, __m512i>::operator()(__m512i in){
// No full vector reduce, use AVX to add upper and lower halves of register
// and perform AVX reduction.
__m256i v1, v2, v3;
__m128i u1, u2, ret;
v1 = _mm512_castsi512_si256(in); // upper half
v2 = _mm512_extracti32x8_epi32(in, 1); // lower half
v3 = _mm256_add_epi32(v1, v2);
v1 = _mm256_hadd_epi32(v3, v3);
v2 = _mm256_hadd_epi32(v1, v1);
u1 = _mm256_castsi256_si128(v2); // upper half
u2 = _mm256_extracti128_si256(v2, 1); // lower half
ret = _mm_add_epi32(u1, u2);
return _mm_cvtsi128_si32(ret);
}
#else
//Complex float Reduce
template<>
inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
@ -590,8 +513,6 @@ namespace Optimization {
inline Integer Reduce<Integer, __m512i>::operator()(__m512i in){
return _mm512_reduce_add_epi32(in);
}
#endif
}

View File

@ -10,6 +10,7 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Guido Cossu <cossu@iroiro-pc.kek.jp>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
Author: Michael Marshall <michael.marshall@ed.ac.au>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -89,17 +90,25 @@ template <typename Condition, typename ReturnType> using NotEnableIf = Invoke<st
////////////////////////////////////////////////////////
// Check for complexity with type traits
template <typename T> struct is_complex : public std::false_type {};
template <> struct is_complex<std::complex<double> > : public std::true_type {};
template <> struct is_complex<std::complex<float> > : public std::true_type {};
template <> struct is_complex<ComplexD> : public std::true_type {};
template <> struct is_complex<ComplexF> : public std::true_type {};
template <typename T> using IfReal = Invoke<std::enable_if<std::is_floating_point<T>::value, int> >;
template<typename T, typename V=void> struct is_real : public std::false_type {};
template<typename T> struct is_real<T, typename std::enable_if<std::is_floating_point<T>::value,
void>::type> : public std::true_type {};
template<typename T, typename V=void> struct is_integer : public std::false_type {};
template<typename T> struct is_integer<T, typename std::enable_if<std::is_integral<T>::value,
void>::type> : public std::true_type {};
template <typename T> using IfReal = Invoke<std::enable_if<is_real<T>::value, int> >;
template <typename T> using IfComplex = Invoke<std::enable_if<is_complex<T>::value, int> >;
template <typename T> using IfInteger = Invoke<std::enable_if<std::is_integral<T>::value, int> >;
template <typename T> using IfInteger = Invoke<std::enable_if<is_integer<T>::value, int> >;
template <typename T1,typename T2> using IfSame = Invoke<std::enable_if<std::is_same<T1,T2>::value, int> >;
template <typename T> using IfNotReal = Invoke<std::enable_if<!std::is_floating_point<T>::value, int> >;
template <typename T> using IfNotReal = Invoke<std::enable_if<!is_real<T>::value, int> >;
template <typename T> using IfNotComplex = Invoke<std::enable_if<!is_complex<T>::value, int> >;
template <typename T> using IfNotInteger = Invoke<std::enable_if<!std::is_integral<T>::value, int> >;
template <typename T> using IfNotInteger = Invoke<std::enable_if<!is_integer<T>::value, int> >;
template <typename T1,typename T2> using IfNotSame = Invoke<std::enable_if<!std::is_same<T1,T2>::value, int> >;
////////////////////////////////////////////////////////
@ -857,8 +866,10 @@ template <typename T>
struct is_simd : public std::false_type {};
template <> struct is_simd<vRealF> : public std::true_type {};
template <> struct is_simd<vRealD> : public std::true_type {};
template <> struct is_simd<vRealH> : public std::true_type {};
template <> struct is_simd<vComplexF> : public std::true_type {};
template <> struct is_simd<vComplexD> : public std::true_type {};
template <> struct is_simd<vComplexH> : public std::true_type {};
template <> struct is_simd<vInteger> : public std::true_type {};
template <typename T> using IfSimd = Invoke<std::enable_if<is_simd<T>::value, int> >;

View File

@ -5,6 +5,7 @@ Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Michael Marshall <michael.marshall@ed.ac.au>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -42,27 +43,26 @@ namespace Grid {
//
class GridTensorBase {};
// Too late to remove these traits from Grid Tensors, so inherit from GridTypeMapper
#define GridVector_CopyTraits \
using element = vtype; \
using scalar_type = typename Traits::scalar_type; \
using vector_type = typename Traits::vector_type; \
using vector_typeD = typename Traits::vector_typeD; \
using tensor_reduced = typename Traits::tensor_reduced; \
using scalar_object = typename Traits::scalar_object; \
using Complexified = typename Traits::Complexified; \
using Realified = typename Traits::Realified; \
using DoublePrecision = typename Traits::DoublePrecision; \
static constexpr int TensorLevel = Traits::TensorLevel
template <class vtype>
class iScalar {
public:
vtype _internal;
typedef vtype element;
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
typedef typename GridTypeMapper<vtype>::vector_type vector_type;
typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD;
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object;
typedef iScalar<tensor_reduced_v> tensor_reduced;
typedef iScalar<recurse_scalar_object> scalar_object;
// substitutes a real or complex version with same tensor structure
typedef iScalar<typename GridTypeMapper<vtype>::Complexified> Complexified;
typedef iScalar<typename GridTypeMapper<vtype>::Realified> Realified;
// get double precision version
typedef iScalar<typename GridTypeMapper<vtype>::DoublePrecision> DoublePrecision;
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 };
using Traits = GridTypeMapper<iScalar<vtype> >;
GridVector_CopyTraits;
// Scalar no action
// template<int Level> using tensor_reduce_level = typename
@ -173,7 +173,10 @@ class iScalar {
return stream;
};
strong_inline const scalar_type * begin() const { return reinterpret_cast<const scalar_type *>(&_internal); }
strong_inline scalar_type * begin() { return reinterpret_cast< scalar_type *>(&_internal); }
strong_inline const scalar_type * end() const { return begin() + Traits::count; }
strong_inline scalar_type * end() { return begin() + Traits::count; }
};
///////////////////////////////////////////////////////////
// Allows to turn scalar<scalar<scalar<double>>>> back to double.
@ -194,22 +197,9 @@ class iVector {
public:
vtype _internal[N];
typedef vtype element;
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
typedef typename GridTypeMapper<vtype>::vector_type vector_type;
typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD;
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object;
typedef iScalar<tensor_reduced_v> tensor_reduced;
typedef iVector<recurse_scalar_object, N> scalar_object;
using Traits = GridTypeMapper<iVector<vtype, N> >;
GridVector_CopyTraits;
// substitutes a real or complex version with same tensor structure
typedef iVector<typename GridTypeMapper<vtype>::Complexified, N> Complexified;
typedef iVector<typename GridTypeMapper<vtype>::Realified, N> Realified;
// get double precision version
typedef iVector<typename GridTypeMapper<vtype>::DoublePrecision, N> DoublePrecision;
template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type
* = nullptr>
strong_inline auto operator=(T arg) -> iVector<vtype, N> {
@ -218,7 +208,6 @@ class iVector {
return *this;
}
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 };
iVector(const Zero &z) { *this = zero; };
iVector() = default;
/*
@ -303,6 +292,11 @@ class iVector {
// strong_inline vtype && operator ()(int i) {
// return _internal[i];
// }
strong_inline const scalar_type * begin() const { return reinterpret_cast<const scalar_type *>(_internal); }
strong_inline scalar_type * begin() { return reinterpret_cast< scalar_type *>(_internal); }
strong_inline const scalar_type * end() const { return begin() + Traits::count; }
strong_inline scalar_type * end() { return begin() + Traits::count; }
};
template <class vtype, int N>
@ -310,25 +304,8 @@ class iMatrix {
public:
vtype _internal[N][N];
typedef vtype element;
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
typedef typename GridTypeMapper<vtype>::vector_type vector_type;
typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD;
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object;
// substitutes a real or complex version with same tensor structure
typedef iMatrix<typename GridTypeMapper<vtype>::Complexified, N> Complexified;
typedef iMatrix<typename GridTypeMapper<vtype>::Realified, N> Realified;
// get double precision version
typedef iMatrix<typename GridTypeMapper<vtype>::DoublePrecision, N> DoublePrecision;
// Tensor removal
typedef iScalar<tensor_reduced_v> tensor_reduced;
typedef iMatrix<recurse_scalar_object, N> scalar_object;
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 };
using Traits = GridTypeMapper<iMatrix<vtype, N> >;
GridVector_CopyTraits;
iMatrix(const Zero &z) { *this = zero; };
iMatrix() = default;
@ -458,6 +435,11 @@ class iMatrix {
// strong_inline vtype && operator ()(int i,int j) {
// return _internal[i][j];
// }
strong_inline const scalar_type * begin() const { return reinterpret_cast<const scalar_type *>(_internal[0]); }
strong_inline scalar_type * begin() { return reinterpret_cast< scalar_type *>(_internal[0]); }
strong_inline const scalar_type * end() const { return begin() + Traits::count; }
strong_inline scalar_type * end() { return begin() + Traits::count; }
};
template <class v>
@ -480,6 +462,3 @@ void vprefetch(const iMatrix<v, N> &vv) {
}
}
#endif

View File

@ -5,6 +5,7 @@
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Christopher Kelly <ckelly@phys.columbia.edu>
Author: Michael Marshall <michael.marshall@ed.ac.au>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
@ -26,6 +27,17 @@ Author: Christopher Kelly <ckelly@phys.columbia.edu>
namespace Grid {
// Forward declarations
template<class T> class iScalar;
template<class T, int N> class iVector;
template<class T, int N> class iMatrix;
// These are the Grid tensors
template<typename T> struct isGridTensor : public std::false_type { static constexpr bool notvalue = true; };
template<class T> struct isGridTensor<iScalar<T>> : public std::true_type { static constexpr bool notvalue = false; };
template<class T, int N> struct isGridTensor<iVector<T, N>> : public std::true_type { static constexpr bool notvalue = false; };
template<class T, int N> struct isGridTensor<iMatrix<T, N>> : public std::true_type { static constexpr bool notvalue = false; };
//////////////////////////////////////////////////////////////////////////////////
// Want to recurse: GridTypeMapper<Matrix<vComplexD> >::scalar_type == ComplexD.
// Use of a helper class like this allows us to template specialise and "dress"
@ -40,25 +52,26 @@ namespace Grid {
// to study C++11's type_traits.h file. (std::enable_if<isGridTensorType<vtype> >)
//
//////////////////////////////////////////////////////////////////////////////////
template <class T> class GridTypeMapper {
public:
typedef typename T::scalar_type scalar_type;
typedef typename T::vector_type vector_type;
typedef typename T::vector_typeD vector_typeD;
typedef typename T::tensor_reduced tensor_reduced;
typedef typename T::scalar_object scalar_object;
typedef typename T::Complexified Complexified;
typedef typename T::Realified Realified;
typedef typename T::DoublePrecision DoublePrecision;
enum { TensorLevel = T::TensorLevel };
// This saves repeating common properties for supported Grid Scalar types
// TensorLevel How many nested grid tensors
// Rank Rank of the grid tensor
// count Total number of elements, i.e. product of dimensions
// Dimension(dim) Size of dimension dim
struct GridTypeMapper_Base {
static constexpr int TensorLevel = 0;
static constexpr int Rank = 0;
static constexpr std::size_t count = 1;
static constexpr int Dimension(int dim) { return 0; }
};
//////////////////////////////////////////////////////////////////////////////////
// Recursion stops with these template specialisations
//////////////////////////////////////////////////////////////////////////////////
template<> class GridTypeMapper<RealF> {
public:
template<typename T> struct GridTypeMapper {};
template<> struct GridTypeMapper<RealF> : public GridTypeMapper_Base {
typedef RealF scalar_type;
typedef RealF vector_type;
typedef RealD vector_typeD;
@ -67,10 +80,8 @@ namespace Grid {
typedef ComplexF Complexified;
typedef RealF Realified;
typedef RealD DoublePrecision;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<RealD> {
public:
template<> struct GridTypeMapper<RealD> : public GridTypeMapper_Base {
typedef RealD scalar_type;
typedef RealD vector_type;
typedef RealD vector_typeD;
@ -79,10 +90,8 @@ namespace Grid {
typedef ComplexD Complexified;
typedef RealD Realified;
typedef RealD DoublePrecision;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<ComplexF> {
public:
template<> struct GridTypeMapper<ComplexF> : public GridTypeMapper_Base {
typedef ComplexF scalar_type;
typedef ComplexF vector_type;
typedef ComplexD vector_typeD;
@ -91,10 +100,8 @@ namespace Grid {
typedef ComplexF Complexified;
typedef RealF Realified;
typedef ComplexD DoublePrecision;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<ComplexD> {
public:
template<> struct GridTypeMapper<ComplexD> : public GridTypeMapper_Base {
typedef ComplexD scalar_type;
typedef ComplexD vector_type;
typedef ComplexD vector_typeD;
@ -103,10 +110,8 @@ namespace Grid {
typedef ComplexD Complexified;
typedef RealD Realified;
typedef ComplexD DoublePrecision;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<Integer> {
public:
template<> struct GridTypeMapper<Integer> : public GridTypeMapper_Base {
typedef Integer scalar_type;
typedef Integer vector_type;
typedef Integer vector_typeD;
@ -115,11 +120,9 @@ namespace Grid {
typedef void Complexified;
typedef void Realified;
typedef void DoublePrecision;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<vRealF> {
public:
template<> struct GridTypeMapper<vRealF> : public GridTypeMapper_Base {
typedef RealF scalar_type;
typedef vRealF vector_type;
typedef vRealD vector_typeD;
@ -128,10 +131,8 @@ namespace Grid {
typedef vComplexF Complexified;
typedef vRealF Realified;
typedef vRealD DoublePrecision;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<vRealD> {
public:
template<> struct GridTypeMapper<vRealD> : public GridTypeMapper_Base {
typedef RealD scalar_type;
typedef vRealD vector_type;
typedef vRealD vector_typeD;
@ -140,10 +141,20 @@ namespace Grid {
typedef vComplexD Complexified;
typedef vRealD Realified;
typedef vRealD DoublePrecision;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<vComplexH> {
public:
template<> struct GridTypeMapper<vRealH> : public GridTypeMapper_Base {
// Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types
typedef RealF scalar_type;
typedef vRealH vector_type;
typedef vRealD vector_typeD;
typedef vRealH tensor_reduced;
typedef RealF scalar_object;
typedef vComplexH Complexified;
typedef vRealH Realified;
typedef vRealD DoublePrecision;
};
template<> struct GridTypeMapper<vComplexH> : public GridTypeMapper_Base {
// Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types
typedef ComplexF scalar_type;
typedef vComplexH vector_type;
typedef vComplexD vector_typeD;
@ -152,10 +163,8 @@ namespace Grid {
typedef vComplexH Complexified;
typedef vRealH Realified;
typedef vComplexD DoublePrecision;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<vComplexF> {
public:
template<> struct GridTypeMapper<vComplexF> : public GridTypeMapper_Base {
typedef ComplexF scalar_type;
typedef vComplexF vector_type;
typedef vComplexD vector_typeD;
@ -164,10 +173,8 @@ namespace Grid {
typedef vComplexF Complexified;
typedef vRealF Realified;
typedef vComplexD DoublePrecision;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<vComplexD> {
public:
template<> struct GridTypeMapper<vComplexD> : public GridTypeMapper_Base {
typedef ComplexD scalar_type;
typedef vComplexD vector_type;
typedef vComplexD vector_typeD;
@ -176,10 +183,8 @@ namespace Grid {
typedef vComplexD Complexified;
typedef vRealD Realified;
typedef vComplexD DoublePrecision;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<vInteger> {
public:
template<> struct GridTypeMapper<vInteger> : public GridTypeMapper_Base {
typedef Integer scalar_type;
typedef vInteger vector_type;
typedef vInteger vector_typeD;
@ -188,57 +193,52 @@ namespace Grid {
typedef void Complexified;
typedef void Realified;
typedef void DoublePrecision;
enum { TensorLevel = 0 };
};
// First some of my own traits
template<typename T> struct isGridTensor {
static const bool value = true;
static const bool notvalue = false;
#define GridTypeMapper_RepeatedTypes \
using BaseTraits = GridTypeMapper<T>; \
using scalar_type = typename BaseTraits::scalar_type; \
using vector_type = typename BaseTraits::vector_type; \
using vector_typeD = typename BaseTraits::vector_typeD; \
static constexpr int TensorLevel = BaseTraits::TensorLevel + 1
template<typename T> struct GridTypeMapper<iScalar<T>> {
GridTypeMapper_RepeatedTypes;
using tensor_reduced = iScalar<typename BaseTraits::tensor_reduced>;
using scalar_object = iScalar<typename BaseTraits::scalar_object>;
using Complexified = iScalar<typename BaseTraits::Complexified>;
using Realified = iScalar<typename BaseTraits::Realified>;
using DoublePrecision = iScalar<typename BaseTraits::DoublePrecision>;
static constexpr int Rank = BaseTraits::Rank + 1;
static constexpr std::size_t count = BaseTraits::count;
static constexpr int Dimension(int dim) {
return ( dim == 0 ) ? 1 : BaseTraits::Dimension(dim - 1); }
};
template<> struct isGridTensor<int > {
static const bool value = false;
static const bool notvalue = true;
template<typename T, int N> struct GridTypeMapper<iVector<T, N>> {
GridTypeMapper_RepeatedTypes;
using tensor_reduced = iScalar<typename BaseTraits::tensor_reduced>;
using scalar_object = iVector<typename BaseTraits::scalar_object, N>;
using Complexified = iVector<typename BaseTraits::Complexified, N>;
using Realified = iVector<typename BaseTraits::Realified, N>;
using DoublePrecision = iVector<typename BaseTraits::DoublePrecision, N>;
static constexpr int Rank = BaseTraits::Rank + 1;
static constexpr std::size_t count = BaseTraits::count * N;
static constexpr int Dimension(int dim) {
return ( dim == 0 ) ? N : BaseTraits::Dimension(dim - 1); }
};
template<> struct isGridTensor<RealD > {
static const bool value = false;
static const bool notvalue = true;
};
template<> struct isGridTensor<RealF > {
static const bool value = false;
static const bool notvalue = true;
};
template<> struct isGridTensor<ComplexD > {
static const bool value = false;
static const bool notvalue = true;
};
template<> struct isGridTensor<ComplexF > {
static const bool value = false;
static const bool notvalue = true;
};
template<> struct isGridTensor<Integer > {
static const bool value = false;
static const bool notvalue = true;
};
template<> struct isGridTensor<vRealD > {
static const bool value = false;
static const bool notvalue = true;
};
template<> struct isGridTensor<vRealF > {
static const bool value = false;
static const bool notvalue = true;
};
template<> struct isGridTensor<vComplexD > {
static const bool value = false;
static const bool notvalue = true;
};
template<> struct isGridTensor<vComplexF > {
static const bool value = false;
static const bool notvalue = true;
};
template<> struct isGridTensor<vInteger > {
static const bool value = false;
static const bool notvalue = true;
template<typename T, int N> struct GridTypeMapper<iMatrix<T, N>> {
GridTypeMapper_RepeatedTypes;
using tensor_reduced = iScalar<typename BaseTraits::tensor_reduced>;
using scalar_object = iMatrix<typename BaseTraits::scalar_object, N>;
using Complexified = iMatrix<typename BaseTraits::Complexified, N>;
using Realified = iMatrix<typename BaseTraits::Realified, N>;
using DoublePrecision = iMatrix<typename BaseTraits::DoublePrecision, N>;
static constexpr int Rank = BaseTraits::Rank + 2;
static constexpr std::size_t count = BaseTraits::count * N * N;
static constexpr int Dimension(int dim) {
return ( dim == 0 || dim == 1 ) ? N : BaseTraits::Dimension(dim - 2); }
};
// Match the index
@ -263,20 +263,13 @@ namespace Grid {
typedef T type;
};
//Query if a tensor or Lattice<Tensor> is SIMD vector or scalar
template<typename T>
class isSIMDvectorized{
template<typename U>
static typename std::enable_if< !std::is_same< typename GridTypeMapper<typename getVectorType<U>::type>::scalar_type,
typename GridTypeMapper<typename getVectorType<U>::type>::vector_type>::value, char>::type test(void *);
//Query whether a tensor or Lattice<Tensor> is SIMD vector or scalar
template<typename T, typename V=void> struct isSIMDvectorized : public std::false_type {};
template<typename U> struct isSIMDvectorized<U, typename std::enable_if< !std::is_same<
typename GridTypeMapper<typename getVectorType<U>::type>::scalar_type,
typename GridTypeMapper<typename getVectorType<U>::type>::vector_type>::value, void>::type>
: public std::true_type {};
template<typename U>
static double test(...);
public:
enum {value = sizeof(test<T>(0)) == sizeof(char) };
};
//Get the precision of a Lattice, tensor or scalar type in units of sizeof(float)
template<typename T>
class getPrecision{

View File

@ -47,6 +47,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#else
#define PARALLEL_FOR_LOOP
#define PARALLEL_FOR_LOOP_INTERN
#define PARALLEL_FOR_LOOP_REDUCE(op, var)
#define PARALLEL_NESTED_LOOP2
#define PARALLEL_NESTED_LOOP5
#define PARALLEL_REGION
@ -58,6 +59,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define parallel_for_internal PARALLEL_FOR_LOOP_INTERN for
#define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for
#define parallel_for_nest5 PARALLEL_NESTED_LOOP5 for
#define parallel_critical PARALLEL_CRITICAL
namespace Grid {

View File

@ -289,6 +289,11 @@ void Grid_init(int *argc,char ***argv)
std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the"<<std::endl;
std::cout << "GNU General Public License for more details."<<std::endl;
printHash();
#ifdef GRID_BUILD_REF
#define _GRID_BUILD_STR(x) #x
#define GRID_BUILD_STR(x) _GRID_BUILD_STR(x)
std::cout << "Build " << GRID_BUILD_STR(GRID_BUILD_REF) << std::endl;
#endif
std::cout << std::endl;
}

View File

@ -28,17 +28,46 @@
extern "C" {
#include <openssl/sha.h>
}
#ifdef USE_IPP
#include "ipp.h"
#endif
#pragma once
class GridChecksum
{
public:
static inline uint32_t crc32(void *data,size_t bytes)
static inline uint32_t crc32(const void *data, size_t bytes)
{
return ::crc32(0L,(unsigned char *)data,bytes);
}
static inline std::vector<unsigned char> sha256(void *data,size_t bytes)
#ifdef USE_IPP
static inline uint32_t crc32c(const void* data, size_t bytes)
{
uint32_t crc32c = ~(uint32_t)0;
ippsCRC32C_8u(reinterpret_cast<const unsigned char *>(data), bytes, &crc32c);
ippsSwapBytes_32u_I(&crc32c, 1);
return ~crc32c;
}
#endif
template <typename T>
static inline std::string sha256_string(const std::vector<T> &hash)
{
std::stringstream sha;
std::string s;
for(unsigned int i = 0; i < hash.size(); i++)
{
sha << std::hex << static_cast<unsigned int>(hash[i]);
}
s = sha.str();
return s;
}
static inline std::vector<unsigned char> sha256(const void *data,size_t bytes)
{
std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH);
SHA256_CTX sha256;

6
HMC/Makefile.am Normal file
View File

@ -0,0 +1,6 @@
SUBDIRS = .
include Make.inc

198
HMC/Mobius2p1f.cc Normal file
View File

@ -0,0 +1,198 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_hmc_EODWFRatio.cc
Copyright (C) 2015-2016
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
int main(int argc, char **argv) {
using namespace Grid;
using namespace Grid::QCD;
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
// here make a routine to print all the relevant information on the run
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
// Typedefs to simplify notation
typedef WilsonImplR FermionImplPolicy;
typedef MobiusFermionR FermionAction;
typedef typename FermionAction::FermionField FermionField;
typedef Grid::XmlReader Serialiser;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// MD.name = std::string("Leap Frog");
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
// MD.name = std::string("Force Gradient");
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
MD.name = std::string("MinimumNorm2");
MD.MDsteps = 20;
MD.trajL = 1.0;
HMCparameters HMCparams;
HMCparams.StartTrajectory = 0;
HMCparams.Trajectories = 200;
HMCparams.NoMetropolisUntil= 20;
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
HMCparams.StartingType =std::string("ColdStart");
HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams);
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_EODWF_lat";
CPparams.rng_prefix = "ckpoint_EODWF_rng";
CPparams.saveInterval = 10;
CPparams.format = "IEEE64BIG";
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
RNGModuleParameters RNGpar;
RNGpar.serial_seeds = "1 2 3 4 5";
RNGpar.parallel_seeds = "6 7 8 9 10";
TheHMC.Resources.SetRNGSeeds(RNGpar);
// Construct observables
// here there is too much indirection
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
const int Ls = 16;
Real beta = 2.13;
Real light_mass = 0.01;
Real strange_mass = 0.04;
Real pv_mass = 1.0;
RealD M5 = 1.8;
RealD b = 1.0; // Scale factor two
RealD c = 0.0;
OneFlavourRationalParams OFRp;
OFRp.lo = 1.0e-2;
OFRp.hi = 64;
OFRp.MaxIter = 10000;
OFRp.tolerance= 1.0e-10;
OFRp.degree = 14;
OFRp.precision= 40;
std::vector<Real> hasenbusch({ 0.1 });
auto GridPtr = TheHMC.Resources.GetCartesian();
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
IwasakiGaugeActionR GaugeAction(beta);
// temporarily need a gauge field
LatticeGaugeField U(GridPtr);
// These lines are unecessary if BC are all periodic
std::vector<Complex> boundary = {1,1,1,-1};
FermionAction::ImplParams Params(boundary);
double StoppingCondition = 1e-10;
double MaxCGIterations = 30000;
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
////////////////////////////////////
// Collect actions
////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1);
ActionLevel<HMCWrapper::Field> Level2(4);
////////////////////////////////////
// Strange action
////////////////////////////////////
// FermionAction StrangeOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_mass,M5,b,c, Params);
// DomainWallEOFAFermionR Strange_Op_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5);
// DomainWallEOFAFermionR Strange_Op_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5);
// ExactOneFlavourRatioPseudoFermionAction EOFA(Strange_Op_L,Strange_Op_R,CG,ofp, false);
FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params);
// OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion(StrangePauliVillarsOp,StrangeOp,OFRp);
OneFlavourRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion(StrangePauliVillarsOp,StrangeOp,OFRp);
// TwoFlavourRationalTesterPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion1F(StrangeOp,OFRp);
// TwoFlavourPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion2F(StrangeOp,CG,CG);
// Level1.push_back(&StrangePseudoFermion2F);
// Level1.push_back(&StrangePseudoFermion);
////////////////////////////////////
// up down action
////////////////////////////////////
std::vector<Real> light_den;
std::vector<Real> light_num;
int n_hasenbusch = hasenbusch.size();
light_den.push_back(light_mass);
for(int h=0;h<n_hasenbusch;h++){
light_den.push_back(hasenbusch[h]);
light_num.push_back(hasenbusch[h]);
}
light_num.push_back(pv_mass);
std::vector<FermionAction *> Numerators;
std::vector<FermionAction *> Denominators;
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
for(int h=0;h<n_hasenbusch+1;h++){
std::cout << GridLogMessage << " 2f quotient Action "<< light_num[h] << " / " << light_den[h]<< std::endl;
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],CG,CG));
}
for(int h=0;h<n_hasenbusch+1;h++){
Level1.push_back(Quotients[h]);
}
/////////////////////////////////////////////////////////////
// Gauge action
/////////////////////////////////////////////////////////////
Level2.push_back(&GaugeAction);
TheHMC.TheAction.push_back(Level1);
TheHMC.TheAction.push_back(Level2);
std::cout << GridLogMessage << " Action complete "<< std::endl;
/////////////////////////////////////////////////////////////
// HMC parameters are serialisable
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
TheHMC.Run(); // no smearing
Grid_finalize();
} // main

452
HMC/Mobius2p1fEOFA.cc Normal file
View File

@ -0,0 +1,452 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file:
Copyright (C) 2015-2016
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Guido Cossu
Author: David Murphy
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
#define MIXED_PRECISION
#endif
namespace Grid{
namespace QCD{
/*
* Need a plan for gauge field update for mixed precision in HMC (2x speed up)
* -- Store the single prec action operator.
* -- Clone the gauge field from the operator function argument.
* -- Build the mixed precision operator dynamically from the passed operator and single prec clone.
*/
template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class SchurOperatorF>
class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
public:
typedef typename FermionOperatorD::FermionField FieldD;
typedef typename FermionOperatorF::FermionField FieldF;
RealD Tolerance;
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations;
Integer MaxOuterIterations;
GridBase* SinglePrecGrid4; //Grid for single-precision fields
GridBase* SinglePrecGrid5; //Grid for single-precision fields
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
FermionOperatorF &FermOpF;
FermionOperatorD &FermOpD;;
SchurOperatorF &LinOpF;
SchurOperatorD &LinOpD;
Integer TotalInnerIterations; //Number of inner CG iterations
Integer TotalOuterIterations; //Number of restarts
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
MixedPrecisionConjugateGradientOperatorFunction(RealD tol,
Integer maxinnerit,
Integer maxouterit,
GridBase* _sp_grid4,
GridBase* _sp_grid5,
FermionOperatorF &_FermOpF,
FermionOperatorD &_FermOpD,
SchurOperatorF &_LinOpF,
SchurOperatorD &_LinOpD):
LinOpF(_LinOpF),
LinOpD(_LinOpD),
FermOpF(_FermOpF),
FermOpD(_FermOpD),
Tolerance(tol),
InnerTolerance(tol),
MaxInnerIterations(maxinnerit),
MaxOuterIterations(maxouterit),
SinglePrecGrid4(_sp_grid4),
SinglePrecGrid5(_sp_grid5),
OuterLoopNormMult(100.)
{
/* Debugging instances of objects; references are stored
std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " <<std::hex<< &LinOpF<<std::dec <<std::endl;
std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpD " <<std::hex<< &LinOpD<<std::dec <<std::endl;
std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpF " <<std::hex<< &FermOpF<<std::dec <<std::endl;
std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <<std::hex<< &FermOpD<<std::dec <<std::endl;
*/
};
void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
// std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <<std::hex<< &(SchurOpU->_Mat)<<std::dec <<std::endl;
// std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpD " <<std::hex<< &(LinOpD._Mat) <<std::dec <<std::endl;
// Assumption made in code to extract gauge field
// We could avoid storing LinopD reference alltogether ?
assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
////////////////////////////////////////////////////////////////////////////////////
// Must snarf a single precision copy of the gauge field in Linop_d argument
////////////////////////////////////////////////////////////////////////////////////
typedef typename FermionOperatorF::GaugeField GaugeFieldF;
typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF;
typedef typename FermionOperatorD::GaugeField GaugeFieldD;
typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD;
GridBase * GridPtrF = SinglePrecGrid4;
GridBase * GridPtrD = FermOpD.Umu._grid;
GaugeFieldF U_f (GridPtrF);
GaugeLinkFieldF Umu_f(GridPtrF);
// std::cout << " Dim gauge field "<<GridPtrF->Nd()<<std::endl; // 4d
// std::cout << " Dim gauge field "<<GridPtrD->Nd()<<std::endl; // 4d
////////////////////////////////////////////////////////////////////////////////////
// Moving this to a Clone method of fermion operator would allow to duplicate the
// physics parameters and decrease gauge field copies
////////////////////////////////////////////////////////////////////////////////////
GaugeLinkFieldD Umu_d(GridPtrD);
for(int mu=0;mu<Nd*2;mu++){
Umu_d = PeekIndex<LorentzIndex>(FermOpD.Umu, mu);
precisionChange(Umu_f,Umu_d);
PokeIndex<LorentzIndex>(FermOpF.Umu, Umu_f, mu);
}
pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
////////////////////////////////////////////////////////////////////////////////////
// Could test to make sure that LinOpF and LinOpD agree to single prec?
////////////////////////////////////////////////////////////////////////////////////
/*
GridBase *Fgrid = psi._grid;
FieldD tmp2(Fgrid);
FieldD tmp1(Fgrid);
LinOpU.Op(src,tmp1);
LinOpD.Op(src,tmp2);
std::cout << " Double gauge field "<< norm2(FermOpD.Umu)<<std::endl;
std::cout << " Single gauge field "<< norm2(FermOpF.Umu)<<std::endl;
std::cout << " Test of operators "<<norm2(tmp1)<<std::endl;
std::cout << " Test of operators "<<norm2(tmp2)<<std::endl;
tmp1=tmp1-tmp2;
std::cout << " Test of operators diff "<<norm2(tmp1)<<std::endl;
*/
////////////////////////////////////////////////////////////////////////////////////
// Make a mixed precision conjugate gradient
////////////////////////////////////////////////////////////////////////////////////
MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
MPCG(src,psi);
}
};
}};
int main(int argc, char **argv) {
using namespace Grid;
using namespace Grid::QCD;
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
// here make a routine to print all the relevant information on the run
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
// Typedefs to simplify notation
typedef WilsonImplR FermionImplPolicy;
typedef MobiusFermionR FermionAction;
typedef MobiusFermionF FermionActionF;
typedef MobiusEOFAFermionR FermionEOFAAction;
typedef MobiusEOFAFermionF FermionEOFAActionF;
typedef typename FermionAction::FermionField FermionField;
typedef typename FermionActionF::FermionField FermionFieldF;
typedef Grid::XmlReader Serialiser;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// MD.name = std::string("Leap Frog");
typedef GenericHMCRunner<ForceGradient> HMCWrapper;
MD.name = std::string("Force Gradient");
// typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
// MD.name = std::string("MinimumNorm2");
MD.MDsteps = 6;
MD.trajL = 1.0;
HMCparameters HMCparams;
HMCparams.StartTrajectory = 590;
HMCparams.Trajectories = 1000;
HMCparams.NoMetropolisUntil= 0;
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
// HMCparams.StartingType =std::string("ColdStart");
HMCparams.StartingType =std::string("CheckpointStart");
HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams);
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_EODWF_lat";
CPparams.rng_prefix = "ckpoint_EODWF_rng";
CPparams.saveInterval = 10;
CPparams.format = "IEEE64BIG";
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
RNGModuleParameters RNGpar;
RNGpar.serial_seeds = "1 2 3 4 5";
RNGpar.parallel_seeds = "6 7 8 9 10";
TheHMC.Resources.SetRNGSeeds(RNGpar);
// Construct observables
// here there is too much indirection
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
const int Ls = 16;
Real beta = 2.13;
Real light_mass = 0.01;
Real strange_mass = 0.04;
Real pv_mass = 1.0;
RealD M5 = 1.8;
RealD b = 1.0;
RealD c = 0.0;
std::vector<Real> hasenbusch({ 0.1, 0.3, 0.6 });
auto GridPtr = TheHMC.Resources.GetCartesian();
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
std::vector<int> latt = GridDefaultLatt();
std::vector<int> mpi = GridDefaultMpi();
std::vector<int> simdF = GridDefaultSimd(Nd,vComplexF::Nsimd());
std::vector<int> simdD = GridDefaultSimd(Nd,vComplexD::Nsimd());
auto GridPtrF = SpaceTimeGrid::makeFourDimGrid(latt,simdF,mpi);
auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF);
auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF);
auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF);
IwasakiGaugeActionR GaugeAction(beta);
// temporarily need a gauge field
LatticeGaugeField U(GridPtr);
LatticeGaugeFieldF UF(GridPtrF);
// These lines are unecessary if BC are all periodic
std::vector<Complex> boundary = {1,1,1,-1};
FermionAction::ImplParams Params(boundary);
FermionActionF::ImplParams ParamsF(boundary);
double ActionStoppingCondition = 1e-10;
double DerivativeStoppingCondition = 1e-6;
double MaxCGIterations = 30000;
////////////////////////////////////
// Collect actions
////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1);
ActionLevel<HMCWrapper::Field> Level2(8);
////////////////////////////////////
// Strange action
////////////////////////////////////
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
typedef SchurDiagMooeeOperator<FermionEOFAActionF,FermionFieldF> LinearOperatorEOFAF;
typedef SchurDiagMooeeOperator<FermionEOFAAction ,FermionField > LinearOperatorEOFAD;
typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusFermionD,MobiusFermionF,LinearOperatorD,LinearOperatorF> MxPCG;
typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusEOFAFermionD,MobiusEOFAFermionF,LinearOperatorEOFAD,LinearOperatorEOFAF> MxPCG_EOFA;
// DJM: setup for EOFA ratio (Mobius)
OneFlavourRationalParams OFRp;
OFRp.lo = 0.1;
OFRp.hi = 25.0;
OFRp.MaxIter = 10000;
OFRp.tolerance= 1.0e-9;
OFRp.degree = 14;
OFRp.precision= 50;
MobiusEOFAFermionR Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
MobiusEOFAFermionF Strange_Op_LF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
MobiusEOFAFermionR Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c);
MobiusEOFAFermionF Strange_Op_RF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c);
ConjugateGradient<FermionField> ActionCG(ActionStoppingCondition,MaxCGIterations);
ConjugateGradient<FermionField> DerivativeCG(DerivativeStoppingCondition,MaxCGIterations);
#ifdef MIXED_PRECISION
const int MX_inner = 1000;
// Mixed precision EOFA
LinearOperatorEOFAD Strange_LinOp_L (Strange_Op_L);
LinearOperatorEOFAD Strange_LinOp_R (Strange_Op_R);
LinearOperatorEOFAF Strange_LinOp_LF(Strange_Op_LF);
LinearOperatorEOFAF Strange_LinOp_RF(Strange_Op_RF);
MxPCG_EOFA ActionCGL(ActionStoppingCondition,
MX_inner,
MaxCGIterations,
GridPtrF,
FrbGridF,
Strange_Op_LF,Strange_Op_L,
Strange_LinOp_LF,Strange_LinOp_L);
MxPCG_EOFA DerivativeCGL(DerivativeStoppingCondition,
MX_inner,
MaxCGIterations,
GridPtrF,
FrbGridF,
Strange_Op_LF,Strange_Op_L,
Strange_LinOp_LF,Strange_LinOp_L);
MxPCG_EOFA ActionCGR(ActionStoppingCondition,
MX_inner,
MaxCGIterations,
GridPtrF,
FrbGridF,
Strange_Op_RF,Strange_Op_R,
Strange_LinOp_RF,Strange_LinOp_R);
MxPCG_EOFA DerivativeCGR(DerivativeStoppingCondition,
MX_inner,
MaxCGIterations,
GridPtrF,
FrbGridF,
Strange_Op_RF,Strange_Op_R,
Strange_LinOp_RF,Strange_LinOp_R);
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy>
EOFA(Strange_Op_L, Strange_Op_R,
ActionCG,
ActionCGL, ActionCGR,
DerivativeCGL, DerivativeCGR,
OFRp, true);
#else
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy>
EOFA(Strange_Op_L, Strange_Op_R,
ActionCG,
ActionCG, ActionCG,
DerivativeCG, DerivativeCG,
OFRp, true);
#endif
Level1.push_back(&EOFA);
////////////////////////////////////
// up down action
////////////////////////////////////
std::vector<Real> light_den;
std::vector<Real> light_num;
int n_hasenbusch = hasenbusch.size();
light_den.push_back(light_mass);
for(int h=0;h<n_hasenbusch;h++){
light_den.push_back(hasenbusch[h]);
light_num.push_back(hasenbusch[h]);
}
light_num.push_back(pv_mass);
//////////////////////////////////////////////////////////////
// Forced to replicate the MxPCG and DenominatorsF etc.. because
// there is no convenient way to "Clone" physics params from double op
// into single op for any operator pair.
// Same issue prevents using MxPCG in the Heatbath step
//////////////////////////////////////////////////////////////
std::vector<FermionAction *> Numerators;
std::vector<FermionAction *> Denominators;
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
std::vector<MxPCG *> ActionMPCG;
std::vector<MxPCG *> MPCG;
std::vector<FermionActionF *> DenominatorsF;
std::vector<LinearOperatorD *> LinOpD;
std::vector<LinearOperatorF *> LinOpF;
for(int h=0;h<n_hasenbusch+1;h++){
std::cout << GridLogMessage << " 2f quotient Action "<< light_num[h] << " / " << light_den[h]<< std::endl;
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
#ifdef MIXED_PRECISION
////////////////////////////////////////////////////////////////////////////
// Mixed precision CG for 2f force
////////////////////////////////////////////////////////////////////////////
DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsF));
LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h]));
MPCG.push_back(new MxPCG(DerivativeStoppingCondition,
MX_inner,
MaxCGIterations,
GridPtrF,
FrbGridF,
*DenominatorsF[h],*Denominators[h],
*LinOpF[h], *LinOpD[h]) );
ActionMPCG.push_back(new MxPCG(ActionStoppingCondition,
MX_inner,
MaxCGIterations,
GridPtrF,
FrbGridF,
*DenominatorsF[h],*Denominators[h],
*LinOpF[h], *LinOpD[h]) );
// Heatbath not mixed yet. As inverts numerators not so important as raised mass.
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],ActionCG));
#else
////////////////////////////////////////////////////////////////////////////
// Standard CG for 2f force
////////////////////////////////////////////////////////////////////////////
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],DerivativeCG,ActionCG));
#endif
}
for(int h=0;h<n_hasenbusch+1;h++){
Level1.push_back(Quotients[h]);
}
/////////////////////////////////////////////////////////////
// Gauge action
/////////////////////////////////////////////////////////////
Level2.push_back(&GaugeAction);
TheHMC.TheAction.push_back(Level1);
TheHMC.TheAction.push_back(Level2);
std::cout << GridLogMessage << " Action complete "<< std::endl;
/////////////////////////////////////////////////////////////
// HMC parameters are serialisable
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
TheHMC.Run(); // no smearing
Grid_finalize();
} // main

198
HMC/Mobius2p1fRHMC.cc Normal file
View File

@ -0,0 +1,198 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_hmc_EODWFRatio.cc
Copyright (C) 2015-2016
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
int main(int argc, char **argv) {
using namespace Grid;
using namespace Grid::QCD;
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
// here make a routine to print all the relevant information on the run
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
// Typedefs to simplify notation
typedef WilsonImplR FermionImplPolicy;
typedef MobiusFermionR FermionAction;
typedef typename FermionAction::FermionField FermionField;
typedef Grid::XmlReader Serialiser;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// MD.name = std::string("Leap Frog");
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
// MD.name = std::string("Force Gradient");
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
MD.name = std::string("MinimumNorm2");
MD.MDsteps = 20;
MD.trajL = 1.0;
HMCparameters HMCparams;
HMCparams.StartTrajectory = 30;
HMCparams.Trajectories = 200;
HMCparams.NoMetropolisUntil= 0;
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
// HMCparams.StartingType =std::string("ColdStart");
HMCparams.StartingType =std::string("CheckpointStart");
HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams);
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_EODWF_lat";
CPparams.rng_prefix = "ckpoint_EODWF_rng";
CPparams.saveInterval = 10;
CPparams.format = "IEEE64BIG";
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
RNGModuleParameters RNGpar;
RNGpar.serial_seeds = "1 2 3 4 5";
RNGpar.parallel_seeds = "6 7 8 9 10";
TheHMC.Resources.SetRNGSeeds(RNGpar);
// Construct observables
// here there is too much indirection
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
const int Ls = 16;
Real beta = 2.13;
Real light_mass = 0.01;
Real strange_mass = 0.04;
Real pv_mass = 1.0;
RealD M5 = 1.8;
RealD b = 1.0;
RealD c = 0.0;
// FIXME:
// Same in MC and MD
// Need to mix precision too
OneFlavourRationalParams OFRp;
OFRp.lo = 4.0e-3;
OFRp.hi = 30.0;
OFRp.MaxIter = 10000;
OFRp.tolerance= 1.0e-10;
OFRp.degree = 16;
OFRp.precision= 50;
std::vector<Real> hasenbusch({ 0.1 });
auto GridPtr = TheHMC.Resources.GetCartesian();
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
IwasakiGaugeActionR GaugeAction(beta);
// temporarily need a gauge field
LatticeGaugeField U(GridPtr);
// These lines are unecessary if BC are all periodic
std::vector<Complex> boundary = {1,1,1,-1};
FermionAction::ImplParams Params(boundary);
double StoppingCondition = 1e-10;
double MaxCGIterations = 30000;
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
////////////////////////////////////
// Collect actions
////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1);
ActionLevel<HMCWrapper::Field> Level2(4);
////////////////////////////////////
// Strange action
////////////////////////////////////
// FermionAction StrangeOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_mass,M5,b,c, Params);
// DomainWallEOFAFermionR Strange_Op_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5);
// DomainWallEOFAFermionR Strange_Op_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5);
// ExactOneFlavourRatioPseudoFermionAction EOFA(Strange_Op_L,Strange_Op_R,CG,ofp, false);
FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion(StrangePauliVillarsOp,StrangeOp,OFRp);
Level1.push_back(&StrangePseudoFermion);
////////////////////////////////////
// up down action
////////////////////////////////////
std::vector<Real> light_den;
std::vector<Real> light_num;
int n_hasenbusch = hasenbusch.size();
light_den.push_back(light_mass);
for(int h=0;h<n_hasenbusch;h++){
light_den.push_back(hasenbusch[h]);
light_num.push_back(hasenbusch[h]);
}
light_num.push_back(pv_mass);
std::vector<FermionAction *> Numerators;
std::vector<FermionAction *> Denominators;
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
for(int h=0;h<n_hasenbusch+1;h++){
std::cout << GridLogMessage << " 2f quotient Action "<< light_num[h] << " / " << light_den[h]<< std::endl;
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],CG,CG));
}
for(int h=0;h<n_hasenbusch+1;h++){
Level1.push_back(Quotients[h]);
}
/////////////////////////////////////////////////////////////
// Gauge action
/////////////////////////////////////////////////////////////
Level2.push_back(&GaugeAction);
TheHMC.TheAction.push_back(Level1);
TheHMC.TheAction.push_back(Level2);
std::cout << GridLogMessage << " Action complete "<< std::endl;
/////////////////////////////////////////////////////////////
// HMC parameters are serialisable
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
TheHMC.Run(); // no smearing
Grid_finalize();
} // main

109
HMC/README Normal file
View File

@ -0,0 +1,109 @@
********************************************************************
TODO:
********************************************************************
i) Got mixed precision in 2f and EOFA force and action solves.
But need mixed precision in the heatbath solve. Best for Fermop to have a "clone" method, to
reduce the number of solver and action objects. Needed ideally for the EOFA heatbath.
15% perhaps
Combine with 2x trajectory length?
ii) Rational on EOFA HB -- relax order
-- Test the approx as per David email
Resume / roll.sh
----------------------------------------------------------------
- 16^3 Currently 10 traj per hour
- EOFA use a different derivative solver from action solver
- EOFA fix Davids hack to the SchurRedBlack guessing
*** Reduce precision/tolerance in EOFA with second CG param. (10% speed up)
*** Force gradient - reduced precision solve for the gradient (4/3x speedup)
*** Need a plan for gauge field update for mixed precision in HMC (2x speed up)
-- Store the single prec action operator.
-- Clone the gauge field from the operator function argument.
-- Build the mixed precision operator dynamically from the passed operator and single prec clone.
*** Mixed precision CG into EOFA portion
*** Further reduce precision in forces to 10^-6 ?
*** Overall: a 3x or so is still possible => 500s -> 160s and 20 traj per hour on 16^3.
- Use mixed precision CG in HMC
- SchurRedBlack.h: stop use of operator function; use LinearOperator or similar instead.
- Or make an OperatorFunction for mixed precision as a wrapper
********************************************************************
* Signed off 2+1f HMC with Hasenbush and strange RHMC 16^3 x 32 DWF Ls=16 Plaquette 0.5883 ish
* Signed off 2+1f HMC with Hasenbush and strange EOFA 16^3 x 32 DWF Ls=16 Plaquette 0.5883 ish
* Wilson plaquette cross checked against CPS and literature GwilsonFnone
********************************************************************
********************************************************************
* RHMC: Timesteps & eigenranges matched from previous CPS 16^3 x 32 runs:
********************************************************************
****
Strange (m=0.04) has eigenspan
****
16^3 done as 1+1+1 with separate PV's.
/dirac1/archive/QCDOC/host/QCDDWF/DWF/2+1f/16nt32/IWASAKI/b2.13/ls16/M1_8/ms0.04/mu0.01/rhmc_multitimescale/evol5/work
****
2+1f 16^3 - [ 4e^-4, 2.42 ] for strange
****
24^3 done as 1+1+1 at strange, and single quotient https://arxiv.org/pdf/0804.0473.pdf Eq 83,
****
double lambda_low = 4.0000000000000002e-04 <- strange
double lambda_low = 1.0000000000000000e-02 <- pauli villars
And high = 2.5
Array bsn_mass[3] = {
double bsn_mass[0] = 1.0000000000000000e+00
double bsn_mass[1] = 1.0000000000000000e+00
double bsn_mass[2] = 1.0000000000000000e+00
}
Array frm_mass[3] = {
double frm_mass[0] = 4.0000000000000001e-02
double frm_mass[1] = 4.0000000000000001e-02
double frm_mass[2] = 4.0000000000000001e-02
}
***
32^3
/dirac1/archive/QCDOC/host/QCDDWF/DWF/2+1f/32nt64/IWASAKI/b2.25/ls16/M1_8/ms0.03/mu0.004/evol6/work
***
Similar det scheme
double lambda_low = 4.0000000000000002e-04
double lambda_low = 1.0000000000000000e-02
Array bsn_mass[3] = {
double bsn_mass[0] = 1.0000000000000000e+00
double bsn_mass[1] = 1.0000000000000000e+00
double bsn_mass[2] = 1.0000000000000000e+00
}
Array frm_mass[3] = {
double frm_mass[0] = 3.0000000000000002e-02
double frm_mass[1] = 3.0000000000000002e-02
double frm_mass[2] = 3.0000000000000002e-02
}
********************************************************************
* Grid: Power method bounds check
********************************************************************
- Finding largest eigenvalue approx 25 not 2.5
- Conventions:
Grid MpcDagMpc based on:
(Moo-Moe Mee^-1 Meo)^dag(Moo-Moe Mee^-1 Meo)
- with Moo = 5-M5 = 3.2
- CPS use(d) Moo = 1
- Eigenrange in Grid is 3.2^2 rescaled so factor of 10 accounted for

View File

@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/A2AMatrix.hpp
Copyright (C) 2015-2018
Copyright (C) 2015-2019
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -29,38 +30,394 @@ See the full license in the file "LICENSE" in the top level distribution directo
#define A2A_Matrix_hpp_
#include <Hadrons/Global.hpp>
#include <Hadrons/TimerArray.hpp>
#include <Grid/Eigen/unsupported/CXX11/Tensor>
#ifdef USE_MKL
#include "mkl.h"
#include "mkl_cblas.h"
#endif
#ifndef HADRONS_A2AM_NAME
#define HADRONS_A2AM_NAME "a2aMatrix"
#endif
#ifndef HADRONS_A2AM_IO_TYPE
#define HADRONS_A2AM_IO_TYPE ComplexF
#endif
#define HADRONS_A2AM_PARALLEL_IO
BEGIN_HADRONS_NAMESPACE
template <typename T, typename MetadataType>
// general A2A matrix set based on Eigen tensors and Grid-allocated memory
// Dimensions:
// 0 - ext - external field (momentum, EM field, ...)
// 1 - str - spin-color structure
// 2 - t - timeslice
// 3 - i - left A2A mode index
// 4 - j - right A2A mode index
template <typename T>
using A2AMatrixSet = Eigen::TensorMap<Eigen::Tensor<T, 5, Eigen::RowMajor>>;
template <typename T>
using A2AMatrix = Eigen::Matrix<T, -1, -1, Eigen::RowMajor>;
template <typename T>
using A2AMatrixTr = Eigen::Matrix<T, -1, -1, Eigen::ColMajor>;
/******************************************************************************
* Abstract class for A2A kernels *
******************************************************************************/
template <typename T, typename Field>
class A2AKernel
{
public:
A2AKernel(void) = default;
virtual ~A2AKernel(void) = default;
virtual void operator()(A2AMatrixSet<T> &m, const Field *left, const Field *right,
const unsigned int orthogDim, double &time) = 0;
virtual double flops(const unsigned int blockSizei, const unsigned int blockSizej) = 0;
virtual double bytes(const unsigned int blockSizei, const unsigned int blockSizej) = 0;
};
/******************************************************************************
* Class to handle A2A matrix block HDF5 I/O *
******************************************************************************/
template <typename T>
class A2AMatrixIo
{
public:
// constructors
A2AMatrixIo(void) = default;
A2AMatrixIo(std::string filename, std::string dataname,
const unsigned int nt, const unsigned int ni,
const unsigned int nj);
const unsigned int nt, const unsigned int ni = 0,
const unsigned int nj = 0);
// destructor
~A2AMatrixIo(void) = default;
// access
unsigned int getNi(void) const;
unsigned int getNj(void) const;
unsigned int getNt(void) const;
size_t getSize(void) const;
// file allocation
template <typename MetadataType>
void initFile(const MetadataType &d, const unsigned int chunkSize);
// block I/O
void saveBlock(const T *data, const unsigned int i, const unsigned int j,
const unsigned int blockSizei, const unsigned int blockSizej);
void saveBlock(const A2AMatrixSet<T> &m, const unsigned int ext, const unsigned int str,
const unsigned int i, const unsigned int j);
template <template <class> class Vec, typename VecT>
void load(Vec<VecT> &v, double *tRead = nullptr);
private:
std::string filename_, dataname_;
unsigned int nt_, ni_, nj_;
std::string filename_{""}, dataname_{""};
unsigned int nt_{0}, ni_{0}, nj_{0};
};
template <typename T, typename MetadataType>
A2AMatrixIo<T, MetadataType>::A2AMatrixIo(std::string filename,
std::string dataname,
const unsigned int nt,
const unsigned int ni,
const unsigned int nj)
/******************************************************************************
* Wrapper for A2A matrix block computation *
******************************************************************************/
template <typename T, typename Field, typename MetadataType, typename TIo = T>
class A2AMatrixBlockComputation
{
private:
struct IoHelper
{
A2AMatrixIo<TIo> io;
MetadataType md;
unsigned int e, s, i, j;
};
typedef std::function<std::string(const unsigned int, const unsigned int)> FilenameFn;
typedef std::function<MetadataType(const unsigned int, const unsigned int)> MetadataFn;
public:
// constructor
A2AMatrixBlockComputation(GridBase *grid,
const unsigned int orthogDim,
const unsigned int next,
const unsigned int nstr,
const unsigned int blockSize,
const unsigned int cacheBlockSize,
TimerArray *tArray = nullptr);
// execution
void execute(const std::vector<Field> &left,
const std::vector<Field> &right,
A2AKernel<T, Field> &kernel,
const FilenameFn &ionameFn,
const FilenameFn &filenameFn,
const MetadataFn &metadataFn);
private:
// I/O handler
void saveBlock(const A2AMatrixSet<TIo> &m, IoHelper &h);
private:
TimerArray *tArray_;
GridBase *grid_;
unsigned int orthogDim_, nt_, next_, nstr_, blockSize_, cacheBlockSize_;
Vector<T> mCache_;
Vector<TIo> mBuf_;
std::vector<IoHelper> nodeIo_;
};
/******************************************************************************
* A2A matrix contraction kernels *
******************************************************************************/
class A2AContraction
{
public:
// accTrMul(acc, a, b): acc += tr(a*b)
template <typename C, typename MatLeft, typename MatRight>
static inline void accTrMul(C &acc, const MatLeft &a, const MatRight &b)
{
if ((MatLeft::Options == Eigen::RowMajor) and
(MatRight::Options == Eigen::ColMajor))
{
parallel_for (unsigned int r = 0; r < a.rows(); ++r)
{
C tmp;
#ifdef USE_MKL
dotuRow(tmp, r, a, b);
#else
tmp = a.row(r).conjugate().dot(b.col(r));
#endif
parallel_critical
{
acc += tmp;
}
}
}
else
{
parallel_for (unsigned int c = 0; c < a.cols(); ++c)
{
C tmp;
#ifdef USE_MKL
dotuCol(tmp, c, a, b);
#else
tmp = a.col(c).conjugate().dot(b.row(c));
#endif
parallel_critical
{
acc += tmp;
}
}
}
}
template <typename MatLeft, typename MatRight>
static inline double accTrMulFlops(const MatLeft &a, const MatRight &b)
{
double n = a.rows()*a.cols();
return 8.*n;
}
// mul(res, a, b): res = a*b
#ifdef USE_MKL
template <template <class, int...> class Mat, int... Opts>
static inline void mul(Mat<ComplexD, Opts...> &res,
const Mat<ComplexD, Opts...> &a,
const Mat<ComplexD, Opts...> &b)
{
static const ComplexD one(1., 0.), zero(0., 0.);
if ((res.rows() != a.rows()) or (res.cols() != b.cols()))
{
res.resize(a.rows(), b.cols());
}
if (Mat<ComplexD, Opts...>::Options == Eigen::RowMajor)
{
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero,
res.data(), res.cols());
}
else if (Mat<ComplexD, Opts...>::Options == Eigen::ColMajor)
{
cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero,
res.data(), res.rows());
}
}
template <template <class, int...> class Mat, int... Opts>
static inline void mul(Mat<ComplexF, Opts...> &res,
const Mat<ComplexF, Opts...> &a,
const Mat<ComplexF, Opts...> &b)
{
static const ComplexF one(1., 0.), zero(0., 0.);
if ((res.rows() != a.rows()) or (res.cols() != b.cols()))
{
res.resize(a.rows(), b.cols());
}
if (Mat<ComplexF, Opts...>::Options == Eigen::RowMajor)
{
cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero,
res.data(), res.cols());
}
else if (Mat<ComplexF, Opts...>::Options == Eigen::ColMajor)
{
cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero,
res.data(), res.rows());
}
}
#else
template <typename Mat>
static inline void mul(Mat &res, const Mat &a, const Mat &b)
{
res = a*b;
}
#endif
template <typename Mat>
static inline double mulFlops(const Mat &a, const Mat &b)
{
double nr = a.rows(), nc = a.cols();
return nr*nr*(6.*nc + 2.*(nc - 1.));
}
private:
template <typename C, typename MatLeft, typename MatRight>
static inline void makeDotRowPt(C * &aPt, unsigned int &aInc, C * &bPt,
unsigned int &bInc, const unsigned int aRow,
const MatLeft &a, const MatRight &b)
{
if (MatLeft::Options == Eigen::RowMajor)
{
aPt = a.data() + aRow*a.cols();
aInc = 1;
}
else if (MatLeft::Options == Eigen::ColMajor)
{
aPt = a.data() + aRow;
aInc = a.rows();
}
if (MatRight::Options == Eigen::RowMajor)
{
bPt = b.data() + aRow;
bInc = b.cols();
}
else if (MatRight::Options == Eigen::ColMajor)
{
bPt = b.data() + aRow*b.rows();
bInc = 1;
}
}
#ifdef USE_MKL
template <typename C, typename MatLeft, typename MatRight>
static inline void makeDotColPt(C * &aPt, unsigned int &aInc, C * &bPt,
unsigned int &bInc, const unsigned int aCol,
const MatLeft &a, const MatRight &b)
{
if (MatLeft::Options == Eigen::RowMajor)
{
aPt = a.data() + aCol;
aInc = a.cols();
}
else if (MatLeft::Options == Eigen::ColMajor)
{
aPt = a.data() + aCol*a.rows();
aInc = 1;
}
if (MatRight::Options == Eigen::RowMajor)
{
bPt = b.data() + aCol*b.cols();
bInc = 1;
}
else if (MatRight::Options == Eigen::ColMajor)
{
bPt = b.data() + aCol;
bInc = b.rows();
}
}
template <typename MatLeft, typename MatRight>
static inline void dotuRow(ComplexF &res, const unsigned int aRow,
const MatLeft &a, const MatRight &b)
{
const ComplexF *aPt, *bPt;
unsigned int aInc, bInc;
makeDotRowPt(aPt, aInc, bPt, bInc, aRow, a, b);
cblas_cdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res);
}
template <typename MatLeft, typename MatRight>
static inline void dotuCol(ComplexF &res, const unsigned int aCol,
const MatLeft &a, const MatRight &b)
{
const ComplexF *aPt, *bPt;
unsigned int aInc, bInc;
makeDotColPt(aPt, aInc, bPt, bInc, aCol, a, b);
cblas_cdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
}
template <typename MatLeft, typename MatRight>
static inline void dotuRow(ComplexD &res, const unsigned int aRow,
const MatLeft &a, const MatRight &b)
{
const ComplexD *aPt, *bPt;
unsigned int aInc, bInc;
makeDotRowPt(aPt, aInc, bPt, bInc, aRow, a, b);
cblas_zdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res);
}
template <typename MatLeft, typename MatRight>
static inline void dotuCol(ComplexD &res, const unsigned int aCol,
const MatLeft &a, const MatRight &b)
{
const ComplexD *aPt, *bPt;
unsigned int aInc, bInc;
makeDotColPt(aPt, aInc, bPt, bInc, aCol, a, b);
cblas_zdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
}
#endif
};
/******************************************************************************
* A2AMatrixIo template implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename T>
A2AMatrixIo<T>::A2AMatrixIo(std::string filename, std::string dataname,
const unsigned int nt, const unsigned int ni,
const unsigned int nj)
: filename_(filename), dataname_(dataname)
, nt_(nt), ni_(ni), nj_(nj)
{}
template <typename T, typename MetadataType>
void A2AMatrixIo<T, MetadataType>::initFile(const MetadataType &d, const unsigned int chunkSize)
// access //////////////////////////////////////////////////////////////////////
template <typename T>
unsigned int A2AMatrixIo<T>::getNt(void) const
{
return nt_;
}
template <typename T>
unsigned int A2AMatrixIo<T>::getNi(void) const
{
return ni_;
}
template <typename T>
unsigned int A2AMatrixIo<T>::getNj(void) const
{
return nj_;
}
template <typename T>
size_t A2AMatrixIo<T>::getSize(void) const
{
return nt_*ni_*nj_*sizeof(T);
}
// file allocation /////////////////////////////////////////////////////////////
template <typename T>
template <typename MetadataType>
void A2AMatrixIo<T>::initFile(const MetadataType &d, const unsigned int chunkSize)
{
#ifdef HAVE_HDF5
std::vector<hsize_t> dim = {static_cast<hsize_t>(nt_),
@ -80,26 +437,28 @@ void A2AMatrixIo<T, MetadataType>::initFile(const MetadataType &d, const unsigne
}
// create the dataset
Hdf5Reader reader(filename_);
Hdf5Reader reader(filename_, false);
push(reader, dataname_);
auto &group = reader.getGroup();
plist.setChunk(chunk.size(), chunk.data());
dataset = group.createDataSet("data", Hdf5Type<T>::type(), dataspace, plist);
plist.setFletcher32();
dataset = group.createDataSet(HADRONS_A2AM_NAME, Hdf5Type<T>::type(), dataspace, plist);
#else
HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library");
#endif
}
template <typename T, typename MetadataType>
void A2AMatrixIo<T, MetadataType>::saveBlock(const T *data,
const unsigned int i,
const unsigned int j,
const unsigned int blockSizei,
const unsigned int blockSizej)
// block I/O ///////////////////////////////////////////////////////////////////
template <typename T>
void A2AMatrixIo<T>::saveBlock(const T *data,
const unsigned int i,
const unsigned int j,
const unsigned int blockSizei,
const unsigned int blockSizej)
{
#ifdef HAVE_HDF5
Hdf5Reader reader(filename_);
Hdf5Reader reader(filename_, false);
std::vector<hsize_t> count = {nt_, blockSizei, blockSizej},
offset = {0, static_cast<hsize_t>(i),
static_cast<hsize_t>(j)},
@ -111,7 +470,7 @@ void A2AMatrixIo<T, MetadataType>::saveBlock(const T *data,
push(reader, dataname_);
auto &group = reader.getGroup();
dataset = group.openDataSet("data");
dataset = group.openDataSet(HADRONS_A2AM_NAME);
dataspace = dataset.getSpace();
dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(),
stride.data(), block.data());
@ -121,6 +480,267 @@ void A2AMatrixIo<T, MetadataType>::saveBlock(const T *data,
#endif
}
template <typename T>
void A2AMatrixIo<T>::saveBlock(const A2AMatrixSet<T> &m,
const unsigned int ext, const unsigned int str,
const unsigned int i, const unsigned int j)
{
unsigned int blockSizei = m.dimension(3);
unsigned int blockSizej = m.dimension(4);
unsigned int nstr = m.dimension(1);
size_t offset = (ext*nstr + str)*nt_*blockSizei*blockSizej;
saveBlock(m.data() + offset, i, j, blockSizei, blockSizej);
}
template <typename T>
template <template <class> class Vec, typename VecT>
void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead)
{
#ifdef HAVE_HDF5
Hdf5Reader reader(filename_);
std::vector<hsize_t> hdim;
H5NS::DataSet dataset;
H5NS::DataSpace dataspace;
H5NS::CompType datatype;
push(reader, dataname_);
auto &group = reader.getGroup();
dataset = group.openDataSet(HADRONS_A2AM_NAME);
datatype = dataset.getCompType();
dataspace = dataset.getSpace();
hdim.resize(dataspace.getSimpleExtentNdims());
dataspace.getSimpleExtentDims(hdim.data());
if ((nt_*ni_*nj_ != 0) and
((hdim[0] != nt_) or (hdim[1] != ni_) or (hdim[2] != nj_)))
{
HADRONS_ERROR(Size, "all-to-all matrix size mismatch (got "
+ std::to_string(hdim[0]) + "x" + std::to_string(hdim[1]) + "x"
+ std::to_string(hdim[2]) + ", expected "
+ std::to_string(nt_) + "x" + std::to_string(ni_) + "x"
+ std::to_string(nj_));
}
else if (ni_*nj_ == 0)
{
if (hdim[0] != nt_)
{
HADRONS_ERROR(Size, "all-to-all time size mismatch (got "
+ std::to_string(hdim[0]) + ", expected "
+ std::to_string(nt_) + ")");
}
ni_ = hdim[1];
nj_ = hdim[2];
}
A2AMatrix<T> buf(ni_, nj_);
std::vector<hsize_t> count = {1, static_cast<hsize_t>(ni_),
static_cast<hsize_t>(nj_)},
stride = {1, 1, 1},
block = {1, 1, 1},
memCount = {static_cast<hsize_t>(ni_),
static_cast<hsize_t>(nj_)};
H5NS::DataSpace memspace(memCount.size(), memCount.data());
std::cout << "Loading timeslice";
std::cout.flush();
*tRead = 0.;
for (unsigned int tp1 = nt_; tp1 > 0; --tp1)
{
unsigned int t = tp1 - 1;
std::vector<hsize_t> offset = {static_cast<hsize_t>(t), 0, 0};
if (t % 10 == 0)
{
std::cout << " " << t;
std::cout.flush();
}
dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(),
stride.data(), block.data());
if (tRead) *tRead -= usecond();
dataset.read(buf.data(), datatype, memspace, dataspace);
if (tRead) *tRead += usecond();
v[t] = buf.template cast<VecT>();
}
std::cout << std::endl;
#else
HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library");
#endif
}
/******************************************************************************
* A2AMatrixBlockComputation template implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename T, typename Field, typename MetadataType, typename TIo>
A2AMatrixBlockComputation<T, Field, MetadataType, TIo>
::A2AMatrixBlockComputation(GridBase *grid,
const unsigned int orthogDim,
const unsigned int next,
const unsigned int nstr,
const unsigned int blockSize,
const unsigned int cacheBlockSize,
TimerArray *tArray)
: grid_(grid), nt_(grid->GlobalDimensions()[orthogDim]), orthogDim_(orthogDim)
, next_(next), nstr_(nstr), blockSize_(blockSize), cacheBlockSize_(cacheBlockSize)
, tArray_(tArray)
{
mCache_.resize(nt_*next_*nstr_*cacheBlockSize_*cacheBlockSize_);
mBuf_.resize(nt_*next_*nstr_*blockSize_*blockSize_);
}
#define START_TIMER(name) if (tArray_) tArray_->startTimer(name)
#define STOP_TIMER(name) if (tArray_) tArray_->stopTimer(name)
#define GET_TIMER(name) ((tArray_ != nullptr) ? tArray_->getDTimer(name) : 0.)
// execution ///////////////////////////////////////////////////////////////////
template <typename T, typename Field, typename MetadataType, typename TIo>
void A2AMatrixBlockComputation<T, Field, MetadataType, TIo>
::execute(const std::vector<Field> &left, const std::vector<Field> &right,
A2AKernel<T, Field> &kernel, const FilenameFn &ionameFn,
const FilenameFn &filenameFn, const MetadataFn &metadataFn)
{
//////////////////////////////////////////////////////////////////////////
// i,j is first loop over blockSize_ factors
// ii,jj is second loop over cacheBlockSize_ factors for high perf contractions
// iii,jjj are loops within cacheBlock
// Total index is sum of these i+ii+iii etc...
//////////////////////////////////////////////////////////////////////////
int N_i = left.size();
int N_j = right.size();
double flops, bytes, t_kernel;
double nodes = grid_->NodeCount();
int NBlock_i = N_i/blockSize_ + (((N_i % blockSize_) != 0) ? 1 : 0);
int NBlock_j = N_j/blockSize_ + (((N_j % blockSize_) != 0) ? 1 : 0);
for(int i=0;i<N_i;i+=blockSize_)
for(int j=0;j<N_j;j+=blockSize_)
{
// Get the W and V vectors for this block^2 set of terms
int N_ii = MIN(N_i-i,blockSize_);
int N_jj = MIN(N_j-j,blockSize_);
A2AMatrixSet<TIo> mBlock(mBuf_.data(), next_, nstr_, nt_, N_ii, N_jj);
LOG(Message) << "All-to-all matrix block "
<< j/blockSize_ + NBlock_j*i/blockSize_ + 1
<< "/" << NBlock_i*NBlock_j << " [" << i <<" .. "
<< i+N_ii-1 << ", " << j <<" .. " << j+N_jj-1 << "]"
<< std::endl;
// Series of cache blocked chunks of the contractions within this block
flops = 0.0;
bytes = 0.0;
t_kernel = 0.0;
for(int ii=0;ii<N_ii;ii+=cacheBlockSize_)
for(int jj=0;jj<N_jj;jj+=cacheBlockSize_)
{
double t;
int N_iii = MIN(N_ii-ii,cacheBlockSize_);
int N_jjj = MIN(N_jj-jj,cacheBlockSize_);
A2AMatrixSet<T> mCacheBlock(mCache_.data(), next_, nstr_, nt_, N_iii, N_jjj);
START_TIMER("kernel");
kernel(mCacheBlock, &left[i+ii], &right[j+jj], orthogDim_, t);
STOP_TIMER("kernel");
t_kernel += t;
flops += kernel.flops(N_iii, N_jjj);
bytes += kernel.bytes(N_iii, N_jjj);
START_TIMER("cache copy");
parallel_for_nest5(int e =0;e<next_;e++)
for(int s =0;s< nstr_;s++)
for(int t =0;t< nt_;t++)
for(int iii=0;iii< N_iii;iii++)
for(int jjj=0;jjj< N_jjj;jjj++)
{
mBlock(e,s,t,ii+iii,jj+jjj) = mCacheBlock(e,s,t,iii,jjj);
}
STOP_TIMER("cache copy");
}
// perf
LOG(Message) << "Kernel perf " << flops/t_kernel/1.0e3/nodes
<< " Gflop/s/node " << std::endl;
LOG(Message) << "Kernel perf " << bytes/t_kernel*1.0e6/1024/1024/1024/nodes
<< " GB/s/node " << std::endl;
// IO
double blockSize, ioTime;
unsigned int myRank = grid_->ThisRank(), nRank = grid_->RankCount();
LOG(Message) << "Writing block to disk" << std::endl;
ioTime = -GET_TIMER("IO: write block");
START_TIMER("IO: total");
makeFileDir(filenameFn(0, 0), grid_);
#ifdef HADRONS_A2AM_PARALLEL_IO
grid_->Barrier();
// make task list for current node
nodeIo_.clear();
for(int f = myRank; f < next_*nstr_; f += nRank)
{
IoHelper h;
h.i = i;
h.j = j;
h.e = f/nstr_;
h.s = f % nstr_;
h.io = A2AMatrixIo<TIo>(filenameFn(h.e, h.s),
ionameFn(h.e, h.s), nt_, N_i, N_j);
h.md = metadataFn(h.e, h.s);
nodeIo_.push_back(h);
}
// parallel IO
for (auto &h: nodeIo_)
{
saveBlock(mBlock, h);
}
grid_->Barrier();
#else
// serial IO, for testing purposes only
for(int e = 0; e < next_; e++)
for(int s = 0; s < nstr_; s++)
{
IoHelper h;
h.i = i;
h.j = j;
h.e = e;
h.s = s;
h.io = A2AMatrixIo<TIo>(filenameFn(h.e, h.s),
ionameFn(h.e, h.s), nt_, N_i, N_j);
h.md = metadataFn(h.e, h.s);
saveBlock(mfBlock, h);
}
#endif
STOP_TIMER("IO: total");
blockSize = static_cast<double>(next_*nstr_*nt_*N_ii*N_jj*sizeof(TIo));
ioTime += GET_TIMER("IO: write block");
LOG(Message) << "HDF5 IO done " << sizeString(blockSize) << " in "
<< ioTime << " us ("
<< blockSize/ioTime*1.0e6/1024/1024
<< " MB/s)" << std::endl;
}
}
// I/O handler /////////////////////////////////////////////////////////////////
template <typename T, typename Field, typename MetadataType, typename TIo>
void A2AMatrixBlockComputation<T, Field, MetadataType, TIo>
::saveBlock(const A2AMatrixSet<TIo> &m, IoHelper &h)
{
if ((h.i == 0) and (h.j == 0))
{
START_TIMER("IO: file creation");
h.io.initFile(h.md, blockSize_);
STOP_TIMER("IO: file creation");
}
START_TIMER("IO: write block");
h.io.saveBlock(m, h.e, h.s, h.i, h.j);
STOP_TIMER("IO: write block");
}
#undef START_TIMER
#undef STOP_TIMER
#undef GET_TIMER
END_HADRONS_NAMESPACE
#endif // A2A_Matrix_hpp_

View File

@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/A2AVectors.hpp
Copyright (C) 2015-2018
Copyright (C) 2015-2019
Author: Antonin Portelli <antonin.portelli@me.com>
Author: fionnoh <fionnoh@gmail.com>
@ -36,7 +36,7 @@ See the full license in the file "LICENSE" in the top level distribution directo
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Classes to generate V & W all-to-all vectors *
* Class to generate V & W all-to-all vectors *
******************************************************************************/
template <typename FImpl>
class A2AVectorsSchurDiagTwo
@ -70,6 +70,42 @@ private:
SchurDiagTwoOperator<FMat, FermionField> op_;
};
/******************************************************************************
* Methods for V & W all-to-all vectors I/O *
******************************************************************************/
class A2AVectorsIo
{
public:
struct Record: Serializable
{
GRID_SERIALIZABLE_CLASS_MEMBERS(Record,
unsigned int, index);
Record(void): index(0) {}
};
public:
template <typename Field>
static void write(const std::string fileStem, std::vector<Field> &vec,
const bool multiFile, const int trajectory = -1);
template <typename Field>
static void read(std::vector<Field> &vec, const std::string fileStem,
const bool multiFile, const int trajectory = -1);
private:
static inline std::string vecFilename(const std::string stem, const int traj,
const bool multiFile)
{
std::string t = (traj < 0) ? "" : ("." + std::to_string(traj));
if (multiFile)
{
return stem + t;
}
else
{
return stem + t + ".bin";
}
}
};
/******************************************************************************
* A2AVectorsSchurDiagTwo template implementation *
******************************************************************************/
@ -217,6 +253,90 @@ void A2AVectorsSchurDiagTwo<FImpl>::makeHighModeW5D(FermionField &wout_4d,
}
}
/******************************************************************************
* all-to-all vectors I/O template implementation *
******************************************************************************/
template <typename Field>
void A2AVectorsIo::write(const std::string fileStem, std::vector<Field> &vec,
const bool multiFile, const int trajectory)
{
Record record;
GridBase *grid = vec[0]._grid;
ScidacWriter binWriter(grid->IsBoss());
std::string filename = vecFilename(fileStem, trajectory, multiFile);
if (multiFile)
{
std::string fullFilename;
for (unsigned int i = 0; i < vec.size(); ++i)
{
fullFilename = filename + "/elem" + std::to_string(i) + ".bin";
LOG(Message) << "Writing vector " << i << std::endl;
makeFileDir(fullFilename, grid);
binWriter.open(fullFilename);
record.index = i;
binWriter.writeScidacFieldRecord(vec[i], record);
binWriter.close();
}
}
else
{
makeFileDir(filename, grid);
binWriter.open(filename);
for (unsigned int i = 0; i < vec.size(); ++i)
{
LOG(Message) << "Writing vector " << i << std::endl;
record.index = i;
binWriter.writeScidacFieldRecord(vec[i], record);
}
binWriter.close();
}
}
template <typename Field>
void A2AVectorsIo::read(std::vector<Field> &vec, const std::string fileStem,
const bool multiFile, const int trajectory)
{
Record record;
ScidacReader binReader;
std::string filename = vecFilename(fileStem, trajectory, multiFile);
if (multiFile)
{
std::string fullFilename;
for (unsigned int i = 0; i < vec.size(); ++i)
{
fullFilename = filename + "/elem" + std::to_string(i) + ".bin";
LOG(Message) << "Reading vector " << i << std::endl;
binReader.open(fullFilename);
binReader.readScidacFieldRecord(vec[i], record);
binReader.close();
if (record.index != i)
{
HADRONS_ERROR(Io, "vector index mismatch");
}
}
}
else
{
binReader.open(filename);
for (unsigned int i = 0; i < vec.size(); ++i)
{
LOG(Message) << "Reading vector " << i << std::endl;
binReader.readScidacFieldRecord(vec[i], record);
if (record.index != i)
{
HADRONS_ERROR(Io, "vector index mismatch");
}
}
binReader.close();
}
}
END_HADRONS_NAMESPACE
#endif // A2A_Vectors_hpp_

View File

@ -4,7 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Application.cc
Copyright (C) 2015-2018
Copyright (C) 2015-2019
Author: Antonin Portelli <antonin.portelli@me.com>
@ -48,28 +48,32 @@ Application::Application(void)
{
initLogger();
auto dim = GridDefaultLatt(), mpi = GridDefaultMpi(), loc(dim);
locVol_ = 1;
for (unsigned int d = 0; d < dim.size(); ++d)
if (dim.size())
{
loc[d] /= mpi[d];
locVol_ *= loc[d];
locVol_ = 1;
for (unsigned int d = 0; d < dim.size(); ++d)
{
loc[d] /= mpi[d];
locVol_ *= loc[d];
}
LOG(Message) << "====== HADRONS APPLICATION INITIALISATION ======" << std::endl;
LOG(Message) << "** Dimensions" << std::endl;
LOG(Message) << "Global lattice: " << dim << std::endl;
LOG(Message) << "MPI partition : " << mpi << std::endl;
LOG(Message) << "Local lattice : " << loc << std::endl;
LOG(Message) << std::endl;
LOG(Message) << "** Default parameters (and associated C macros)" << std::endl;
LOG(Message) << "ASCII output precision : " << MACOUT(DEFAULT_ASCII_PREC) << std::endl;
LOG(Message) << "Fermion implementation : " << MACOUTS(FIMPLBASE) << std::endl;
LOG(Message) << "z-Fermion implementation: " << MACOUTS(ZFIMPLBASE) << std::endl;
LOG(Message) << "Scalar implementation : " << MACOUTS(SIMPLBASE) << std::endl;
LOG(Message) << "Gauge implementation : " << MACOUTS(GIMPLBASE) << std::endl;
LOG(Message) << "Eigenvector base size : "
<< MACOUT(HADRONS_DEFAULT_LANCZOS_NBASIS) << std::endl;
LOG(Message) << "Schur decomposition : " << MACOUTS(HADRONS_DEFAULT_SCHUR) << std::endl;
LOG(Message) << std::endl;
}
LOG(Message) << "====== HADRONS APPLICATION INITIALISATION ======" << std::endl;
LOG(Message) << "** Dimensions" << std::endl;
LOG(Message) << "Global lattice: " << dim << std::endl;
LOG(Message) << "MPI partition : " << mpi << std::endl;
LOG(Message) << "Local lattice : " << loc << std::endl;
LOG(Message) << std::endl;
LOG(Message) << "** Default parameters (and associated C macros)" << std::endl;
LOG(Message) << "ASCII output precision : " << MACOUT(DEFAULT_ASCII_PREC) << std::endl;
LOG(Message) << "Fermion implementation : " << MACOUTS(FIMPLBASE) << std::endl;
LOG(Message) << "z-Fermion implementation: " << MACOUTS(ZFIMPLBASE) << std::endl;
LOG(Message) << "Scalar implementation : " << MACOUTS(SIMPLBASE) << std::endl;
LOG(Message) << "Gauge implementation : " << MACOUTS(GIMPLBASE) << std::endl;
LOG(Message) << "Eigenvector base size : "
<< MACOUT(HADRONS_DEFAULT_LANCZOS_NBASIS) << std::endl;
LOG(Message) << "Schur decomposition : " << MACOUTS(HADRONS_DEFAULT_SCHUR) << std::endl;
LOG(Message) << std::endl;
}
Application::Application(const Application::GlobalPar &par)
@ -108,10 +112,28 @@ void Application::run(void)
HADRONS_ERROR(Definition, "run id is empty");
}
LOG(Message) << "RUN ID '" << getPar().runId << "'" << std::endl;
BinaryIO::latticeWriteMaxRetry = getPar().parallelWriteMaxRetry;
LOG(Message) << "Attempt(s) for resilient parallel I/O: "
<< BinaryIO::latticeWriteMaxRetry << std::endl;
vm().setRunId(getPar().runId);
vm().printContent();
env().printContent();
schedule();
if (getPar().saveSchedule or getPar().scheduleFile.empty())
{
schedule();
if (getPar().saveSchedule)
{
std::string filename;
filename = (getPar().scheduleFile.empty()) ?
"hadrons.sched" : getPar().scheduleFile;
saveSchedule(filename);
}
}
else
{
loadSchedule(getPar().scheduleFile);
}
printSchedule();
if (!getPar().graphFile.empty())
{
@ -158,12 +180,13 @@ void Application::parseParameterFile(const std::string parameterFileName)
pop(reader);
}
void Application::saveParameterFile(const std::string parameterFileName)
void Application::saveParameterFile(const std::string parameterFileName, unsigned int prec)
{
LOG(Message) << "Saving application to '" << parameterFileName << "'..." << std::endl;
if (env().getGrid()->IsBoss())
{
XmlWriter writer(parameterFileName);
writer.setPrecision(prec);
ObjectId id;
const unsigned int nMod = vm().getNModule();

Some files were not shown because too many files have changed in this diff Show More