1
0
mirror of https://github.com/paboyle/Grid.git synced 2026-04-18 17:56:12 +01:00

Compare commits

...

824 Commits

Author SHA1 Message Date
Azusa Yamaguchi bb7378cfc3 Schur for staggered 2017-10-10 12:02:18 +01:00
Azusa Yamaguchi f0e084a88c Schur staggered 2017-10-10 10:00:43 +01:00
Azusa Yamaguchi 09f4cdb11e Merge branch 'develop' of https://github.com/paboyle/Grid into feature/staggering 2017-10-04 10:51:16 +01:00
Azusa Yamaguchi 1e54882f71 Stagger 2017-10-04 10:51:06 +01:00
paboyle d54807b8c0 MPIT works with split grid now 2017-10-02 23:14:56 +01:00
paboyle 5625b47c7d Merge branch 'feature/dwf-multirhs' into develop 2017-10-02 12:42:32 +01:00
paboyle 1edcf902b7 Macos ANON 2017-10-02 12:41:02 +01:00
paboyle e5c19e1fd7 RB constructor change 2017-10-02 12:25:52 +01:00
paboyle a11d0a33d1 Merge branch 'feature/dwf-multirhs' of https://github.com/paboyle/Grid into feature/dwf-multirhs 2017-10-02 11:42:07 +01:00
paboyle 4f8b6f26b4 Merge branch 'develop' into feature/dwf-multirhs 2017-10-02 11:41:49 +01:00
paboyle 073525c5b3 Small patch from cori 2017-10-02 03:38:21 -07:00
Azusa Yamaguchi eb6153080a Merge branch 'develop' of https://github.com/paboyle/Grid into feature/staggering 2017-10-02 08:56:33 +01:00
Guido Cossu f7072d1ac2 Solving an annoying compilation error in json 2017-10-02 07:13:40 +01:00
paboyle fddeb29d6b Bug fix with spreadout FFT 2017-09-21 11:10:08 +01:00
paboyle a9ec5cf564 Christoph bug report integrate 2017-09-21 10:32:41 +01:00
Peter Boyle 946a8671b9 Merge pull request #129 from djm2131/feature/eofa
Add support for DWF with the exact one flavor algorithm
2017-09-21 10:15:21 +01:00
Peter Boyle 771a1b8e79 Merge pull request #128 from paboyle/feature/CG-reliable-update
Feature/cg reliable update
2017-09-21 10:12:03 +01:00
Peter Boyle bfb68e6f02 Merge pull request #130 from giltirn/gparity-handunroll
Gparity handunroll
2017-09-21 10:11:00 +01:00
paboyle 18c335198a Merge branch 'hotfix/dirac-ITT-fix1' into develop 2017-09-16 18:19:02 +01:00
paboyle 17c5b0f152 Patching comparison point 2017-09-16 18:18:07 +01:00
paboyle 5918769f97 Subtle Naik term bug updated in Stencil; less on logical && with a function call on right 2017-09-16 12:51:26 +01:00
Guido Cossu bbaf1ada91 Merge branch 'feature/json-fix' into develop 2017-09-08 16:02:08 +01:00
Guido Cossu 1950ac9294 Fixed the Intel compiler problem with the JSON classes 2017-09-08 15:18:59 +01:00
Guido Cossu 13fa70ac1a Merge branch 'develop' into feature/json-fix 2017-09-08 13:42:20 +01:00
Guido Cossu 7cb2b11f26 Fixing Intel compiler error for the JSON parser 2017-09-08 13:41:53 +01:00
Guido Cossu 1184ed29ae Merge pull request #124 from nmeyer-ur/feature/arm-neon
Added integer reduce functionality
2017-09-08 10:54:35 +02:00
paboyle 203c7bf6fa Merge branch 'hotfix/dirac-ITT-fix' into develop 2017-09-05 15:08:51 +01:00
paboyle c709883f3f Merge branch 'hotfix/dirac-ITT-fix' 2017-09-05 15:08:16 +01:00
paboyle aed5de4d50 Patching macos compile 2017-09-05 15:07:07 +01:00
paboyle ba27cc6571 Mac os happiness 2017-09-05 15:00:16 +01:00
paboyle d856327250 Merge branch 'release/dirac-ITT' into develop 2017-09-05 14:56:12 +01:00
paboyle d75369cb56 Merge branch 'release/dirac-ITT' 2017-09-05 14:55:54 +01:00
Peter Boyle bf973d0d56 SHM complete 2017-09-05 14:30:29 +01:00
Peter Boyle 837bf8a5be Updating to control the SHM allocation scheme under configure time options 2017-09-05 12:51:02 +01:00
Peter Boyle c05b2199f6 Improvements to huge memory 2017-09-04 10:41:21 -04:00
Azusa Yamaguchi a5fe07c077 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-09-04 14:10:15 +01:00
Azusa Yamaguchi b83b2b1415 Stability improvement to BCG. Force m_rr hermitian beyond rounding. 2017-09-04 14:09:47 +01:00
Peter Boyle b331be9101 Better reporting 2017-08-31 11:32:57 +01:00
Peter Boyle 49c20a9fa8 Patch to reporting 2017-08-31 11:32:21 +01:00
paboyle 7359df3501 Full reporting for benchmark; save robustness factor 2017-08-31 10:42:35 +01:00
Christopher Kelly 59bd1fe21b Fix for 'perm' and 'local' not being set for hand-unrolled external-site Dslash, which caused incorrect behavior of G-parity kernel 2017-08-29 13:07:37 -07:00
Nils Meyer 4e907fef2c Merge remote-tracking branch 'grid/develop' into feature/arm-neon 2017-08-29 17:47:36 +02:00
Christopher Kelly 67888b657f Merge branch 'gparity-handunroll' of https://github.com/giltirn/Grid into gparity-handunroll 2017-08-29 09:52:05 -04:00
Christopher Kelly 74af885d4e Removed some no-longer-needed associated with G-parity hand unrolled kernel 2017-08-29 09:50:37 -04:00
Christopher Kelly d36d2fb40d Added ability to override default Ls in Benchmark_dwf 2017-08-28 06:53:56 -07:00
Peter Boyle 5b9267e88d Cleaner comms benchmark treatment for one node runs 2017-08-27 18:24:48 -04:00
paboyle 15fd4003ef Improving presentation of results 2017-08-27 13:46:02 +01:00
paboyle 4b4c2a715b fcntl.h needed 2017-08-26 11:38:04 +01:00
paboyle 54a5e6c1d0 Check if we get huge pages on linux. Larry Meadows piece of magic. 2017-08-25 22:36:08 +01:00
paboyle 73aeca7dea Merge branch 'feature/multi-communicator' into develop 2017-08-25 21:55:09 +01:00
paboyle ad89abb018 Fix 2017-08-25 20:43:37 +01:00
paboyle 80c5bce5bb Merge branch 'develop' into feature/multi-communicator 2017-08-25 20:21:26 +01:00
paboyle f68b5de9c8 No compile fix on Clang 2017-08-25 19:35:21 +01:00
Peter Boyle d0f3d525d5 Optimal block size for KNL 2017-08-25 19:33:54 +01:00
Christopher Kelly f365a83fae In G-parity unrolled kernel, replaced calls to permute and exchange with run-time-evaluated permute type with explicit calls to appropriate underlying functions 2017-08-25 14:24:11 -04:00
Peter Boyle 3a58217405 Updated 2017-08-25 14:29:53 +01:00
Peter Boyle c289699d9a updated from cambridge mpi3 shakeout 2017-08-25 11:41:01 +01:00
Peter Boyle c3b1263e75 Benchmark prep 2017-08-25 09:25:54 +01:00
Christopher Kelly 34a9aeb331 Reduced number of if-statement evaluations in G-parity unrolled kernel 2017-08-24 13:53:50 -07:00
portelli 102ea9ae66 CI update 2017-08-24 18:17:09 +01:00
paboyle 5fa386ddc9 FFT test compile fixed 2017-08-24 10:17:52 +01:00
Christopher Kelly edabb3577f Imported Benchmark_gparity 2017-08-23 16:54:06 -04:00
Christopher Kelly ce5df177ee Removed superfluous implementation of G-parity twist for hand-unrolled kernel from GparityWilsonImpl 2017-08-23 15:05:22 -04:00
Christopher Kelly a0bb8e5b46 Added hand-unrolled kernel implementations of all the other dslash precision / comms precision combinations with G-parity 2017-08-23 14:44:40 -04:00
Christopher Kelly 46f88e6d72 G-parity hand-unrolled intrinsics twist now uses one less permute and one less temporary 2017-08-23 13:21:10 -04:00
David Murphy dd8f1ea189 Vectorized Mobius EOFA Dperp + shift operation 2017-08-23 13:17:26 -04:00
Christopher Kelly b61835c1a5 Added inplace version of intrinsic G-parity twist to hand-unrolled kernel 2017-08-23 12:33:48 -04:00
Azusa Yamaguchi d9cd4f0273 Staggered multinode block cg debugged. Missing global sum.
Code stalls and resumes on KNL at cambridge. Curious.

CG iterations 23ms each, then 3200 ms pauses. Mean bandwidth reports
as 200MB/s. Comms dominant in the report. However, the time behaviour suggests it
is *bursty*.... Could be swap to disk?
2017-08-23 15:07:18 +01:00
David Murphy 459f70e8d4 Check-in of working Mobius EOFA class and tests 2017-08-22 22:38:30 -04:00
Christopher Kelly 061e48fd73 Replaced slow unpack-repack in G-parity BC twist with intrinsics version 2017-08-22 18:12:12 -04:00
Christopher Kelly ab50145001 Implemented first, unoptimized version of hand-unrolled G-parity kernels
Improved Test_gparity
2017-08-22 17:12:25 -04:00
paboyle b49bec0cec MAP_HUGETLB portability fix 2017-08-20 03:08:54 +01:00
paboyle ae56e556c6 finalise issue on new OPA revert 2017-08-20 02:53:12 +01:00
paboyle 1cdf999668 Moving multicommunicator into mpi3 also for threading 2017-08-20 02:39:10 +01:00
paboyle 11062fb686 Comms none fail fix 2017-08-20 01:37:07 +01:00
paboyle 383ca7d392 Switch off comms for now until feature/multi-communicator is merged 2017-08-20 01:27:48 +01:00
paboyle a446d95c33 Trying to pass TeamCity and Travis 2017-08-20 01:10:50 +01:00
paboyle be66e7dd95 Merge branch 'develop' into feature/multi-communicator 2017-08-19 23:12:38 +01:00
paboyle 6d0d064a6c Update TODO 2017-08-19 23:11:30 +01:00
paboyle bfef525ed2 New benchmark prep 2017-08-19 23:10:12 +01:00
Peter Boyle 0b0cf62193 Fix mpi 3 interface change 2017-08-19 13:18:50 -04:00
Peter Boyle 7d88198387 Merge branch 'develop' into feature/multi-communicator 2017-08-19 13:03:35 -04:00
Peter Boyle 2f619482b8 Enable blocking stencil send 2017-08-19 12:53:59 -04:00
Peter Boyle d6472eda8d Use mmap 2017-08-19 12:53:18 -04:00
Peter Boyle 9e658de238 Use Vector 2017-08-19 12:52:44 -04:00
Peter Boyle bcefdd7c4e Align both allocator calls to 2MB 2017-08-19 12:49:02 -04:00
David Murphy 9d45fca8bc Implement MobiusEOFAFermioncache.cc 2017-08-17 23:45:36 -04:00
David Murphy ac9e6b63c0 More re-import of Mobius EOFA 2017-08-17 19:28:53 -04:00
David Murphy e140b3f802 Beginning to re-import Mobius EOFA 2017-08-16 23:36:23 -04:00
David Murphy d9d3d30cc7 Minor clean-up 2017-08-16 20:57:51 -04:00
David Murphy 47a12ec7b5 Implement EOFA pseudofermion force and Shamir tests for G-parity and non G-parity cases 2017-08-16 19:50:08 -04:00
David Murphy ec1e2f7a40 Add (mostly implemented) ExactOneFlavourRatio pseudofermion class and tests of Shamir heatbath and action 2017-08-16 12:38:59 -04:00
David Murphy 41f73ec083 Add ChronoForecast class for forecasting solutions across poles in the EOFA heatbath 2017-08-16 12:37:38 -04:00
Guido Cossu fd367d8bfd Debugging the PointerCache 2017-08-16 09:42:57 +01:00
David Murphy 6d0786ff9d Typo fixes and check-in of G-parity action test for DWF 2017-08-15 22:47:00 -04:00
David Murphy b7f93aeb4d Change CayleyFermion5D::SetCoefficientsInternal to virtual to allow overriding in derived EOFA classes 2017-08-15 14:18:51 -04:00
David Murphy 202a7fe900 Re-import DWF and abstract base EOFA fermion classes and tests 2017-08-15 13:36:08 -04:00
Guido Cossu 8a3fe60a27 Added more asserts at grid creation time 2017-08-08 11:36:20 +01:00
Guido Cossu 44051aecd1 Checking for integer divisions in cartesian full 2017-08-08 10:31:12 +01:00
Guido Cossu 06e6f8de00 Check that the reduced dim is an integer 2017-08-08 10:22:12 +01:00
Guido Cossu dbe4d7850c Make a test file compatible with all architectures 2017-08-06 10:49:45 +01:00
Guido Cossu 4fe182e5a7 Added high level HMC support for overriding default SIMD lane decomposition 2017-08-06 10:46:19 +01:00
Guido Cossu 175f393f9d Binary IO error checking 2017-08-04 12:14:10 +01:00
Christopher Kelly 7d867a8134 Merge branch 'develop' into feature/CG-reliable-update 2017-08-02 09:48:04 -04:00
Christopher Kelly 9939b267d2 Added switching to fallback linear operator in reliable update CG, and added recalculation of b parameter on update. 2017-07-31 13:39:44 -04:00
Peter Boyle 14d53e1c9e Threaded MPI calls patches 2017-07-29 13:08:10 -04:00
Guido Cossu 8bd869da37 Correcting a bug in the IO routines 2017-07-27 15:12:50 +01:00
Guido Cossu c7036f6717 Adding checks for libm and libstdc++ 2017-07-27 11:15:09 +01:00
Guido Cossu c0485d799d Explicit parameter declaration in the WilsonGauge test 2017-07-26 16:26:04 +01:00
Guido Cossu 7abc5613bd Added smearing to the topological charge observable 2017-07-26 16:21:17 +01:00
Guido Cossu 237cfd11ab Solving the spurious O2 flags 2017-07-26 12:08:51 +01:00
Guido Cossu a4b7dddb67 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-07-26 12:07:38 +01:00
Guido Cossu 5696781862 Debug error in Tensor mult 2017-07-26 12:07:34 +01:00
Christopher Kelly 8f4b3049cd Merge branch 'feature/CG-reliable-update' into ckelly_develop 2017-07-25 11:55:26 -04:00
Christopher Kelly 2a6e673a91 Merge branch 'develop' into feature/CG-reliable-update 2017-07-25 11:54:43 -04:00
Christopher Kelly 9b6cde173f Merge branch 'feature/CG-reliable-update' into ckelly_develop 2017-07-25 11:51:08 -04:00
Christopher Kelly 9f280b82c4 Added mixed-precision CG with reliable updates 2017-07-25 11:30:41 -04:00
portelli c3f0889eda Merge pull request #123 from giltirn/develop
Fix for 'using namespace' in lib/qcd/utils/GaugeFix.h
2017-07-25 11:32:02 -03:00
Nils Meyer 7a53dc3715 Added integer reduce functionality 2017-07-24 11:12:59 +02:00
Christopher Kelly 0f214ad427 Moved FourierAcceleratedGaugeFixer into Grid::QCD namespace and removed 'using namespace' directives from header 2017-07-21 11:13:51 -04:00
Peter Boyle fe4912880d Update README.md 2017-07-17 09:53:07 +01:00
Peter Boyle f038c6babe Update README.md 2017-07-14 22:59:16 +01:00
Peter Boyle 169f4b2711 Update README.md 2017-07-14 22:56:06 +01:00
Peter Boyle 2d8aff36fe Update README.md 2017-07-14 22:52:16 +01:00
Guido Cossu 9fa07eecde Merge branch 'develop' into feature/json-fix 2017-07-12 15:47:22 +01:00
azusayamaguchi 659d7d1a40 For test/solver
Fixed
2017-07-12 15:01:48 +01:00
Guido Cossu f64fb7bd77 Fix gcc error on JSON compilation 2017-07-12 14:55:42 +01:00
Guido Cossu 2a35449b91 Merge branch 'develop' into feature/json-fix 2017-07-12 14:47:00 +01:00
Guido Cossu 184af5bd05 Added support for std::pair in the JSON serialiser 2017-07-12 14:44:53 +01:00
Guido Cossu 097c9637ee Fixed the JSON parsing error 2017-07-11 14:31:57 +01:00
azusayamaguchi dc6f078246 fixed the header file for mpi3 2017-07-11 14:15:08 +01:00
Peter Boyle 8a4714a4a6 Update README.md 2017-07-09 00:11:54 +01:00
Peter Boyle 40e119c61c NUMA improvements worth preserving from AMD EPYC tests 2017-07-08 22:27:11 -04:00
Guido Cossu d9593c4b81 Merge branch 'develop' into feature/json-fix 2017-07-07 14:17:50 +01:00
paboyle ac740f73ce Works on Cori 2017-07-02 16:47:58 -07:00
paboyle 75dc7794b9 Working on Cori 2017-07-02 16:47:42 -07:00
paboyle dee68fc728 IO working multiple nodes again. Strategy of all nodes writing metadata is unsafe.
Only one rank should do this. must identify this rank. Means pass communicator to the
Objects.
2017-07-02 23:33:48 +01:00
paboyle a2d3643634 Merge branch 'feature/dwf-multirhs' of https://github.com/paboyle/Grid into feature/dwf-multirhs 2017-07-02 14:59:22 -07:00
paboyle 57002924bc NERSC shakeout of this 2017-07-02 14:58:30 -07:00
Peter Boyle 7b0237b081 Update README.md 2017-07-01 10:24:41 +01:00
Peter Boyle b68ad0cc0b Update README.md 2017-07-01 10:20:07 +01:00
Peter Boyle 37263fd9b1 Update README.md 2017-07-01 10:06:24 +01:00
Peter Boyle 3d09e3e9e0 Update README.md 2017-07-01 10:05:46 +01:00
Peter Boyle 1354b46338 Update README.md 2017-07-01 10:04:32 +01:00
Peter Boyle 251a97fe1b Update README.md 2017-07-01 09:55:36 +01:00
Peter Boyle e18929eaa0 Update README.md 2017-07-01 09:53:15 +01:00
Peter Boyle f3b0a92e71 Update README.md 2017-07-01 09:48:00 +01:00
Peter Boyle a0be3f7330 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-06-30 10:53:50 +01:00
Peter Boyle b5a6e4f1fd Best option for Xeon cache blocking set 2017-06-30 10:53:22 +01:00
Peter Boyle 7a788db3dc Guard first touch 2017-06-30 10:49:08 +01:00
Peter Boyle f20eceb6cd First touch once per page in a threaded loop 2017-06-30 10:48:27 +01:00
Peter Boyle 38325ebbc6 Interleave code path; not enabled 2017-06-30 10:23:51 +01:00
Peter Boyle b73bd151bb Switch off counters by default 2017-06-30 10:16:35 +01:00
Peter Boyle 694b305cab Update to reporting 2017-06-30 10:16:13 +01:00
Peter Boyle 2d3737a133 O3, KNL 2017-06-30 10:15:59 +01:00
Peter Boyle ac1f1838bc KNL only 2017-06-30 10:15:32 +01:00
Guido Cossu 09d09d0fe5 Update README.md 2017-06-29 11:48:11 +01:00
Guido Cossu bf630a6821 README file update 2017-06-29 11:42:25 +01:00
Guido Cossu 8859a151cc Small corrections to the NEON port 2017-06-29 11:30:29 +01:00
Guido Cossu 688a39cfd9 Merge pull request #114 from nmeyer-ur/feature/arm-neon
ARM neon intrinsics support
Guido: checked and approved
2017-06-29 09:57:17 +01:00
paboyle 6f5a5cd9b3 Improved threaded comms benchmark 2017-06-28 23:27:02 +01:00
Nils Meyer 0933aeefd4 corrected Grid_neon.h 2017-06-28 20:22:22 +02:00
Peter Boyle 322f61acee Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-06-28 15:30:35 +01:00
Peter Boyle 08e04b9676 Better benchmarks 2017-06-28 15:30:06 +01:00
portelli feaa2ac947 Merge branch 'feature/scalar-hmc-update' into develop 2017-06-28 12:46:18 +01:00
portelli 07de925127 minor scalar action fixes 2017-06-28 12:45:44 +01:00
Nils Meyer a9c816a268 moved file to correct folder 2017-06-27 21:39:15 +02:00
Nils Meyer e43a8b6b8a removed comments 2017-06-27 20:58:48 +02:00
Nils Meyer bf729766dd removed collision with QPX implementation 2017-06-27 20:32:24 +02:00
Guido Cossu dafb351d38 Merge pull request #120 from paboyle/feature/scalar-hmc-update
Scalar HMC update. 
I agree with the changes.
2017-06-27 16:23:14 +01:00
portelli 0b707b861c Merge branch 'develop' into feature/scalar-hmc-update 2017-06-27 14:40:05 +01:00
portelli 15e87a4607 HDF5 IO fix 2017-06-27 14:39:27 +01:00
portelli 7d7220cbd7 scalar: lambda/4! convention 2017-06-27 14:38:45 +01:00
paboyle 54e94360ad Experimental: Multiple communicators to see if we can avoid thread locks in --enable-comms=mpit 2017-06-24 23:10:24 +01:00
portelli 0af740dc15 minor scalar HMC code improvement 2017-06-24 23:04:05 +01:00
portelli d2e8372df3 SU(N) algebra fix (was not working) 2017-06-24 23:03:39 +01:00
paboyle 869b99ec1e Threaded calls to multiple communicators 2017-06-24 10:55:54 +01:00
paboyle 4a29ab0d0a Merge branch 'feature/dwf-multirhs' of https://github.com/paboyle/Grid into feature/dwf-multirhs 2017-06-23 23:10:43 +01:00
paboyle 0165bcb58e Added an update to TODO list 2017-06-23 23:10:24 +01:00
portelli 4372d04ad4 Merge pull request #118 from Lanny91/hotfix/bgq
Hotfix/bgq
2017-06-23 16:59:27 +01:00
paboyle 349d75e483 Precision fix 2017-06-23 02:57:59 -07:00
Lanny91 56abbdf4c2 AVX512 integer reduce fix (for non-intel compiler) 2017-06-23 11:09:14 +02:00
Lanny91 af71c63f4c AVX2 fix 2017-06-23 11:03:12 +02:00
paboyle e51475703a Ticking off lots on the TODO list 2017-06-23 09:42:21 +01:00
paboyle 1feddf4ba6 const fixes 2017-06-22 19:32:41 +01:00
paboyle 600d7ddc2e Proof of concept : Multi RHS solver, running independent solves on different ranks 2017-06-22 18:54:34 +01:00
paboyle e504260f3d Able to run a test job splitting into multiple MPI subdomains. 2017-06-22 18:53:11 +01:00
Lanny91 0440d4ce66 Merge branch 'develop' of https://github.com/paboyle/Grid into hotfix/bgq 2017-06-22 17:09:42 +02:00
paboyle 5e4bea8f20 Benchmark DWF works 2017-06-22 08:38:54 +01:00
paboyle 6ebf9f15b7 Splitting communicators first cut 2017-06-22 08:14:34 +01:00
paboyle 1d7aa673a4 Include BlockCG by default 2017-06-21 21:08:53 +01:00
paboyle b9104f3072 Block CG 2017-06-21 21:08:03 +01:00
portelli b22eab8c8b Merge commit 'a7d56523abee6c9030fdd9303c79954897b1086f' into feature/hadrons 2017-06-21 18:32:48 +01:00
paboyle a7d56523ab Merge branch 'feature/lanczos-simplify' into develop 2017-06-21 14:03:20 +01:00
paboyle 9e56c65730 Updated TODO list 2017-06-21 14:02:58 +01:00
paboyle ef4f2b8c41 todo update 2017-06-21 09:22:20 +01:00
paboyle e8b95bd35b Clean up finished. Could shrink Lanczos to around 400 lines at a push 2017-06-21 02:50:09 +01:00
paboyle 7e35286860 Simplified lanczos, added Eigen diagonalisation.
Curious if we can deprecate dependencly on BLAS.
Will see when we get 48^3 running on our BG/Q port
2017-06-21 02:26:03 +01:00
paboyle 0486ff8e79 Improved the lancos 2017-06-20 18:46:01 +01:00
portelli 1e8a2e1621 various compatibility fixes after merge 2017-06-20 17:24:55 +01:00
portelli 7587df831a Merge branch 'develop' into feature/hadrons
# Conflicts:
#	lib/qcd/action/scalar/ScalarImpl.h
2017-06-20 15:50:39 +01:00
Azusa Yamaguchi e9cc21900f Block solver complete for staggered. Now stable on mass 0.003 and
gives 8x (!) speed up on Haswell laptop vs. standard CG for 8 RHS solves.

166 iterations vs. 537 iterations so algorithmic gain + 2x in flop rate gain.

Better than a slap in the face with a wet kipper.
2017-06-20 12:37:41 +01:00
Azusa Yamaguchi 0a8faac271 Fix make tests compile 2017-06-19 22:54:18 +01:00
Azusa Yamaguchi abc4de0fd2 No compile make tests fix 2017-06-19 22:03:03 +01:00
portelli b672717096 Test_serialiation update for JSON 2017-06-19 14:38:39 +01:00
portelli 284ee194b1 JSON update 2017-06-19 14:38:15 +01:00
Azusa Yamaguchi cfe3cd76d1 Block solver improvements 2017-06-19 14:04:21 +01:00
Azusa Yamaguchi 3fa5e3109f Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-06-19 14:01:44 +01:00
paboyle 8b7049f737 Improved detectino of usqcdInfo for plaq/linktr 2017-06-19 08:46:07 +01:00
paboyle c85024683e Merge branch 'feature/parallelio' into develop 2017-06-19 01:39:48 +01:00
paboyle 1300b0b04b Update to enable multiple records per file more consistent with SciDAC.
open, close, write records...
2017-06-19 01:01:48 +01:00
paboyle e6d984b484 ILDG tests 2017-06-18 00:13:22 +01:00
paboyle 1d18d95d4f Class name return 2017-06-18 00:13:03 +01:00
paboyle ae39ec85a3 ComplexField defined 2017-06-18 00:12:48 +01:00
paboyle b96daf53a0 Query tensor structures 2017-06-18 00:12:15 +01:00
paboyle 46879e1658 Complex defined in Impl even for gauge. 2017-06-18 00:11:45 +01:00
paboyle ae4de94798 SciDAC I/O support 2017-06-18 00:11:23 +01:00
paboyle 0ab555b4f5 SciDAC I/O and ILDG improvements 2017-06-18 00:11:02 +01:00
paboyle 8e9be9f84f Updates for SciDAC IO 2017-06-18 00:10:42 +01:00
paboyle d572170170 Update for SciDAC 2017-06-18 00:10:20 +01:00
portelli 81b18f843a Merge branch 'feature/scalar_adjointFT' into feature/hadrons
# Conflicts:
#	lib/qcd/action/scalar/ScalarImpl.h
2017-06-16 17:59:55 +01:00
Lanny91 a833f88c32 Added missing SIMD integer reduction implementation for AVX, AVX-512, SSE4, IMCI 2017-06-16 15:58:47 +01:00
Lanny91 07b2c1b253 Placeholder precision change functions to allow Grid to compile with QPX (warning: no actual functionality) 2017-06-16 15:04:26 +01:00
Lanny91 735cbdb983 QPX Integer reduction (+ integer reduction test) 2017-06-14 10:55:10 +01:00
Lanny91 2ad54c5a02 QPX exchange support 2017-06-14 10:53:39 +01:00
paboyle 12ccc73cf5 Serialisation no compile fix 2017-06-14 05:19:17 +01:00
Nils Meyer 3d04dc33c6 ARM neon intrinsics support 2017-06-13 13:26:59 +02:00
paboyle e7564f8330 Starting a test for reading an ILDG file. 2017-06-13 12:22:50 +01:00
paboyle 91199a8ea0 openmpi is not const safe 2017-06-13 12:21:29 +01:00
paboyle 0494feec98 Libz dependency 2017-06-13 12:00:23 +01:00
paboyle a16b1e134e gcc 4.9 fix 2017-06-13 10:48:43 +01:00
paboyle 769ad578f5 Odd new error on G++ 49 on travis 2017-06-12 00:41:21 +01:00
paboyle eaac0044b5 Compile fixes 2017-06-12 00:20:49 +01:00
paboyle 56042f002c New files 2017-06-11 23:19:20 +01:00
paboyle 3bfd1f13e6 I/O improvements 2017-06-11 23:14:10 +01:00
Azusa Yamaguchi 70ab598c96 Move gfix into utils 2017-06-08 22:22:23 +01:00
Azusa Yamaguchi 1d0ca65e28 Move Gfix into utils 2017-06-08 22:21:50 +01:00
Azusa Yamaguchi 2bc4d0a20e Move code into utils 2017-06-08 22:21:25 +01:00
portelli 2490816297 Hadrons: rare kaon program removed 2017-06-07 20:11:02 -05:00
portelli 5f55bca378 Hadrons: Quark module renamed MFermion::GaugeProp 2017-06-07 20:10:48 -05:00
portelli f6aa82b7f2 Merge branch 'develop' into feature/hadrons 2017-06-06 11:46:33 -05:00
portelli 22749699a3 Fixes after merge and point sink module 2017-06-06 11:45:30 -05:00
portelli 0503c028be Merge branch 'feature/qed-fvol' into feature/hadrons (non-trivial conflicts on scalar Impl)
# Conflicts:
#	configure.ac
#	lib/qcd/action/scalar/Scalar.h
2017-06-05 16:37:47 -05:00
paboyle 092dcd4e04 MPI I/O only if MPI compiled 2017-06-02 22:50:25 +01:00
Guido Cossu 4a8c4ccfba Test wilson flow, added maxTau for adaptive flow 2017-06-02 17:02:29 +01:00
Guido Cossu 9b44189d5a Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-06-02 16:56:00 +01:00
Guido Cossu 7da4856e8e Wilson flow with adaptive steps 2017-06-02 16:55:53 +01:00
Guido Cossu aaf1e33a77 Adding adaptive integration in the WilsonFlow 2017-06-02 16:32:35 +01:00
paboyle 094c3d091a Improved and RNG's now survive checkpoint 2017-06-02 00:38:58 +01:00
Peter Boyle 4b98e524a0 Roll over to MPI version of I/O 2017-06-01 17:38:18 -04:00
Peter Boyle 1a1f6d55f9 Roll over to MPI IO for parallel IO 2017-06-01 17:37:26 -04:00
Peter Boyle 21421656ab Big changes improving the code to use MPI IO 2017-06-01 17:36:53 -04:00
Peter Boyle 6f687a67cd As local vols increase, use 64 bits for safety 2017-06-01 17:36:18 -04:00
paboyle b30754e762 Merge branch 'feature/parallelio' of https://github.com/paboyle/Grid into feature/parallelio 2017-05-30 23:41:28 +01:00
paboyle 1e429a0d57 Added MPI version 2017-05-30 23:41:07 +01:00
paboyle d38a4de36c Beginning move to MPI IO 2017-05-30 23:40:39 +01:00
paboyle ef1b7db374 Diff comparison check 2017-05-30 23:40:11 +01:00
paboyle 53a9aeb965 Cosmetic only 2017-05-30 23:39:53 +01:00
paboyle e30fa9f4b8 RankCount; need to clean up ambigious ProcessCount 2017-05-30 23:39:16 +01:00
paboyle 58e8d0a10d reverse direction lexico mapping 2017-05-30 23:38:30 +01:00
paboyle 62cf9cf638 Cleaner code 2017-05-30 23:38:02 +01:00
paboyle 0fb458879d Precision safe compile 2017-05-30 23:37:02 +01:00
Peter Boyle 725c513d94 Better MPI3 benchmarking 2017-05-29 16:47:32 -04:00
portelli d8648307ff Merge branch 'develop' into feature/hadrons 2017-05-29 12:58:08 +01:00
portelli 064315c00b Hadrons: mesons gamma list fix 2017-05-29 12:57:33 +01:00
Guido Cossu 7c6cc85df6 Updating WilsonFlow test 2017-05-27 18:03:49 +01:00
Guido Cossu a6691ef87c Merge pull request #110 from Lanny91/feature/hadrons
Hadrons: Fermion boundary conditions can now be set in measurement code.
2017-05-26 16:43:22 +01:00
Lanny91 8e0ced627a Hadrons: Fermion boundary conditions can now be set in measurement code. 2017-05-26 15:59:15 +01:00
Guido Cossu 0de314870d Faster derivative for WilsonGauge 2017-05-26 14:31:49 +01:00
Guido Cossu ffb91e53d2 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-26 12:46:02 +01:00
Guido Cossu f4e8bf2858 Fixing the topological charge. Wilson Flow tested, ok 2017-05-26 12:45:59 +01:00
portelli a74c34315c Bootstrap script fix 2017-05-25 14:27:49 +01:00
paboyle 69470ccc10 Update to do list 2017-05-25 13:41:26 +01:00
paboyle b8b5934193 Attempts to speed up the parallel IO 2017-05-25 13:32:24 +01:00
Guido Cossu 75856f2945 Compilation fix in the Tensor_exp 2017-05-25 12:44:56 +01:00
Guido Cossu 3c112a7a25 Small correction to the general exp definition 2017-05-25 12:09:00 +01:00
Guido Cossu ab3596d4d3 Using Cayley-Hamilton form for the exponential of SU(3) matrices 2017-05-25 12:07:47 +01:00
paboyle a8c10b1933 Use a global-X x Local-Y chunksize for parallel binary I/O.
Gives O(32 x 8 x 18*8*8) chunk size on configuration I/O.

At 150KB should be getting close to packet sizes and 4MB filesystem
block sizes that are reasonably (!?) performant. We shall see once I move
this off my laptop and over to BNL and time it.
2017-05-25 11:43:33 +01:00
Guido Cossu 15e801af3f Fixing a compilation error for generic SIMD 2017-05-19 16:39:36 +01:00
Guido Cossu 0ffc235741 Adding more statistics to the Benchmark_comms. Min and max 2017-05-19 10:55:04 +01:00
Guido Cossu 8e19c99c7d Adding more statistical info in the Benchmark_comms 2017-05-18 19:07:35 +01:00
Guido Cossu a0bc0ad06f Reverting change in Bechmark_comms. Keeping 300 iterations 2017-05-18 17:48:11 +01:00
Guido Cossu a8fb2835ca Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-18 14:45:00 +01:00
Guido Cossu bc862ce3ab Fixing an allocation issue in Benchmark_comms 2017-05-18 14:44:56 +01:00
portelli 22f4feee7b Merge branch 'develop' into feature/scalar_adjointFT 2017-05-17 13:27:13 +02:00
portelli 3f858d6755 Scalar: phi^2 observable 2017-05-17 13:25:14 +02:00
paboyle 3267683e22 Union workaround for g++ 2017-05-17 11:26:18 +01:00
Azusa Yamaguchi f46a67ffb3 No compile issue on clang on mac fixed.
Compiler version was clang++-3.9 under mpicxx
2017-05-17 10:51:01 +01:00
paboyle f7b8383ef5 Half precisoin comms mixed prec test 2017-05-16 14:52:51 +01:00
Guido Cossu 10f2872aae Faster exponentiation for lattice fields 2017-05-15 15:51:16 +01:00
portelli 35fa3d1dfd Merge branch 'master' into feature/scalar_adjointFT 2017-05-12 10:41:39 +01:00
paboyle cd73897b8d Merge branch 'release/v0.7.0' into develop 2017-05-12 01:16:02 +01:00
paboyle c4435e6beb Merge branch 'release/v0.7.0' 2017-05-12 01:15:59 +01:00
paboyle 7a8f6af5f8 Drop verbose compiler predefine check 2017-05-11 12:48:40 +01:00
paboyle 49a5d9bac7 Clang major, minor trailing underscore 2017-05-11 12:25:02 +01:00
paboyle 2b3fdd4a58 Print CXX predefines 2017-05-11 12:05:50 +01:00
paboyle 34502ec471 4.8 dropped as buggy. 2017-05-11 11:43:39 +01:00
paboyle 8a43e88b4f Compiler check early in build 2017-05-11 11:43:06 +01:00
portelli d1ece74137 HMC scalar test: magnetisation measurement 2017-05-11 11:40:44 +01:00
paboyle 238df20370 Still working on the compiler compat checks 2017-05-11 11:30:14 +01:00
paboyle 97a32a6145 Add 4.8 test 2017-05-11 11:24:21 +01:00
paboyle 655492a443 Compiler detection 2017-05-11 11:21:11 +01:00
paboyle 1cab06f6bd Compat checks for compilers 2017-05-11 10:20:24 +01:00
portelli 43c817cc67 Scalar action: const fix 2017-05-11 00:07:17 +01:00
paboyle f8024c262b Update Eigen 2017-05-10 13:30:09 +01:00
Guido Cossu 4cc5f01f4a Small change in the readme about the intel compiler 2017-05-09 15:38:59 +01:00
Guido Cossu 9c12c37aaf Confirming the fix on the complex boundary conditions 2017-05-09 08:41:29 +01:00
Guido Cossu 806eaa0530 Adding back the IO tests in the list 2017-05-08 22:26:44 +01:00
Guido Cossu 01d0e54594 Merge branch 'release/v0.7.0' into develop 2017-05-08 22:02:51 +01:00
Guido Cossu 5aafa335fe Fixing JSON error for complex numbers 2017-05-08 21:56:44 +01:00
Guido Cossu 8ba0494485 Fixing JSON for complex numbers 2017-05-08 21:41:39 +01:00
Peter Boyle d99d98d9fd Merge branch 'release/v0.7.0' of https://github.com/paboyle/Grid into release/v0.7.0 2017-05-08 15:08:20 -04:00
Peter Boyle 95a017a4ae Relax force constraints to pass in single precision. 2017-05-08 15:06:41 -04:00
paboyle 92f92379e6 Adding olivers test version 2017-05-08 18:42:19 +01:00
paboyle 529e78d43f Restart the v0.7.0 release 2017-05-08 18:20:04 +01:00
paboyle 4ec746d262 Merge branch 'release/v0.7.0' into develop 2017-05-06 18:43:03 +01:00
paboyle 51bf1501fc Merge branch 'release/v0.7.0' 2017-05-06 18:42:50 +01:00
paboyle 66d819c054 More info on gcc bug 2017-05-06 18:42:11 +01:00
paboyle 3f3686f869 formattign 2017-05-06 18:41:27 +01:00
paboyle 26bb829f8c Formatting 2017-05-06 18:40:55 +01:00
paboyle 67cb04fc66 README update 2017-05-06 18:39:54 +01:00
paboyle a40bd68aed Version update 2017-05-06 17:00:14 +01:00
paboyle 36495e0fd2 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-06 16:39:27 +01:00
paboyle 93f6c15772 Warning squash 2017-05-06 16:38:58 +01:00
Peter Boyle cb93eeff21 Update README 2017-05-06 16:28:12 +01:00
paboyle c7cc7e6101 Fix 2017-05-06 16:10:09 +01:00
paboyle c349aa6511 DEFINE warning elimination 2017-05-06 16:08:35 +01:00
paboyle 3bae0a2d5c Drop a gcc warning 2017-05-06 15:51:42 +01:00
paboyle c1c7566089 GCC bug work around in 5.0 through 6.2 inclusive. 2017-05-06 15:20:25 +01:00
paboyle 2439999ec8 Warning elimination; drop to -O2 on G++ bad versions 2017-05-06 14:44:49 +01:00
paboyle 1d96f662e3 Fixed 4d fermion gparity force. Put strong tests on make check force tests 2017-05-06 00:46:31 +01:00
paboyle 41d1889941 trusty ubuntu 2017-05-05 21:25:35 +01:00
paboyle 0c3981e0c3 Trying to force recent automake 2017-05-05 21:15:22 +01:00
paboyle c727bd4609 Trying to work around automake version 2017-05-05 21:00:00 +01:00
paboyle db23749b67 Adding travis to make check 2017-05-05 20:42:08 +01:00
paboyle 751f2b9703 Better check and benchmark driving 2017-05-05 19:54:38 +01:00
Guido Cossu 741bc836f6 Exposing support for Ncolours and Ndimensions and JSON input file for the ScalarAction 2017-05-05 17:36:43 +01:00
paboyle 697c0603ce SITMO I/O for NERSC working now bit repro 2017-05-05 16:54:44 +01:00
paboyle 14bedebb11 Source pointed to 2017-05-05 16:17:27 +01:00
Guido Cossu 8546d01a4c Merge branch 'develop' into feature/scalar_adjointFT 2017-05-05 15:47:33 +01:00
paboyle 47b5c07ffb Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-05 14:27:02 +01:00
Guido Cossu da86a2bf54 Merge branch 'feature/hmc_generalise' into develop 2017-05-05 14:23:02 +01:00
paboyle c1cb60a0b3 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-05 14:22:37 +01:00
Guido Cossu 5ed5b4bfbf Merge branch 'develop' into feature/hmc_generalise 2017-05-05 14:22:33 +01:00
Guido Cossu de84aacdfd Fixing a configure error for the smearing tests 2017-05-05 13:59:10 +01:00
paboyle 2888003765 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-05 13:02:24 +01:00
paboyle da06bf5b95 Zmobius force test added 2017-05-05 12:52:45 +01:00
Guido Cossu 20999c1370 Merge branch 'develop' into feature/hmc_generalise 2017-05-05 12:47:17 +01:00
paboyle 33f0ed1a33 No compile fix 2017-05-05 11:04:30 +01:00
paboyle 50be56433b Delete old and defunct tests 2017-05-04 23:41:16 +01:00
paboyle 43924007db Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-04 19:53:41 +01:00
paboyle 78ef10e60f Mobius force improvement 2017-05-04 19:53:21 +01:00
portelli 679ae98b14 Merge branch 'feature/better-external-library' into develop 2017-05-04 15:42:12 +01:00
paboyle 90f6bc16bb No compile clang fix 2017-05-04 12:15:06 +01:00
Peter Boyle 9b5b639546 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-05-03 20:51:40 -04:00
Peter Boyle 945767c6d8 More info 2017-05-03 20:26:35 -04:00
Peter Boyle 422cdf4979 Some checks 2017-05-03 18:37:38 -04:00
Peter Boyle 38db174f3b Print statement 2017-05-03 18:25:26 -04:00
Peter Boyle 92e364a35f Better reporting in benchmark for MPI3 2017-05-03 15:43:36 -04:00
portelli 58299b8ba2 Git info separated from version in git-config 2017-05-02 20:04:41 +01:00
portelli 124bf4d829 git ref in config summary 2017-05-02 19:41:01 +01:00
portelli e8e56b3414 Config summary saved in git-config 2017-05-02 19:40:47 +01:00
portelli 89c430136d grid-config program 2017-05-02 19:13:13 +01:00
portelli ea9aef7baa New header for standard headers (was an issue with Remez.h and external compilation) 2017-05-02 18:26:11 +01:00
portelli c9e9e8061d Merge branch 'feature/hadrons' into develop 2017-05-02 18:23:47 +01:00
Guido Cossu 453cf2a1c6 Moving the topological charge outside the HMC related routines 2017-05-02 14:40:12 +01:00
Guido Cossu de7bbfa5f9 Adding ParameterFile option for the HMC 2017-05-02 12:16:16 +01:00
portelli dda8d77c87 Merge branch 'feature/hadrons' into feature/rare_kaon 2017-05-01 17:50:57 +01:00
portelli aa29f4346a Hadrons: weird bus error with recent macOS clang 2017-05-01 17:49:08 +01:00
Guido Cossu 86116dbed6 Adding boundary condition switch (compile time) for the Mobius HMC example 2017-05-01 16:33:11 +01:00
Guido Cossu 7bd31e3f7c Adding external file support in the Mobius example (JSON) 2017-05-01 16:30:24 +01:00
Guido Cossu 74f451715f Fix for Mac compilation on the size_t uint64_t types 2017-05-01 15:12:07 +01:00
Guido Cossu 655be8ed76 Adding tests for the mobius operator 2017-05-01 14:42:16 +01:00
Guido Cossu 4063238943 Adding HMC test file example for Mobius + smearing 2017-05-01 13:44:00 +01:00
Guido Cossu 3344788fa1 Merge branch 'develop' into feature/hmc_generalise 2017-05-01 12:13:56 +01:00
Peter Boyle 99220f6531 Fixes and better timing 2017-04-26 17:24:11 -04:00
paboyle 2a6d093749 move the sudo: required to match locatoin on Guido's branch 2017-04-26 09:15:34 +01:00
paboyle c947947fad sudo required suggested by guido 2017-04-26 08:45:36 +01:00
paboyle f555b50547 Merge branch 'feature/half-prec-comms' into develop 2017-04-26 08:43:40 +01:00
paboyle 738c1a11c2 longer nloop 2017-04-26 08:43:20 +01:00
Peter Boyle f8797e1e3e bug fix. works now and great face performance 2017-04-26 03:14:02 -04:00
Peter Boyle fd1eb7de13 Clean implementation of the exterior faces listing only those points on the boudary 2017-04-26 02:34:52 -04:00
Peter Boyle 2ce898efa3 Pretty code 2017-04-26 02:34:25 -04:00
paboyle ab66bac4e6 Think I'm getting on top of the reduced cost exterior precomputed list of links 2017-04-25 08:50:26 +01:00
paboyle 56277a11c8 Build a list of whats on the surface 2017-04-24 17:06:15 +01:00
paboyle 916e9e1d3e Merge branch 'feature/half-prec-comms' of https://github.com/paboyle/Grid into feature/half-prec-comms 2017-04-24 10:39:19 +01:00
Peter Boyle 5b55867a7a Slightly cheaper Ext assembly 2017-04-24 05:36:11 -04:00
Peter Boyle 3accb1ef89 Debugged assemply split phase with interior suppression 2017-04-23 19:30:19 -04:00
Peter Boyle e3d0e31525 Debugged assemply split phase with interior suppression 2017-04-23 19:29:27 -04:00
Peter Boyle 5812eb8a8c Partially fixed. But the comms-overlap does not work yet. 2017-04-22 18:50:25 -04:00
paboyle 4dd3763294 Use OMP as much as possible 2017-04-22 20:35:20 +01:00
paboyle c429ace748 Cleaner OpenMP use 2017-04-22 20:28:42 +01:00
paboyle ac58565d0a Dangerous rewrite of the assembly. If I make a mistake the debug will be painful. 2017-04-22 19:31:04 +01:00
paboyle 3703b718aa Mark up a table if a given site only receives from itself; including MPI3 splitting info. 2017-04-22 19:28:37 +01:00
paboyle b722889234 Try a better load balancing loop 2017-04-22 19:27:41 +01:00
paboyle abba44a837 Hand unrolled for overlapped comms 2017-04-22 17:45:17 +01:00
paboyle f301be94ce Fixed 2017-04-22 17:42:31 +01:00
Peter Boyle 1d1b225497 Hand unrolled Nc=3 kernels support split phase compute (on-node, off-node). 2017-04-22 09:05:28 -04:00
Peter Boyle 53a785a3dd Fixing the KNL compile 2017-04-22 08:11:51 -04:00
paboyle 736bf3c866 Major rework of stencil. Half precision and MPI3 now working. 2017-04-22 11:33:50 +01:00
paboyle b9bbe5d188 L1p config bg/q 2017-04-22 11:33:09 +01:00
paboyle 3844bcf800 If no f16c instructions supported must use software half precision conversion.
This will also become useful on BG/Q, so will move out from SSE4 into a general area.
Lifted the Eigen half precision from web. Looks sensible, but not extensively regressed
against the intrinsics implementation yet.
2017-04-20 15:30:52 +01:00
paboyle e1a2319d01 Simple compressor moved out of cshift into stencil 2017-04-20 13:18:15 +01:00
paboyle 180c732b4c Move compressors out of Cshift.
Slice iterators would help
2017-04-20 13:17:55 +01:00
paboyle 957a706d0b Useful script 2017-04-20 13:17:44 +01:00
paboyle d2312e9874 Drop compressor entirely from Cshift to only Stencil. 2017-04-20 13:16:55 +01:00
paboyle fc4ab9ccd5 Working half precision comms 2017-04-20 11:20:26 +01:00
paboyle 4a340aa5ca Massive compressor rework to support reduced precision comms 2017-04-20 09:28:27 +01:00
paboyle 3b7de792d5 Type comparison in the traits work 2017-04-18 13:28:04 +01:00
paboyle 557c3fa109 Pretty change 2017-04-18 13:27:38 +01:00
paboyle ec18e9f7f6 Merge branch 'develop' into feature/half-prec-comms 2017-04-18 11:39:39 +01:00
paboyle a839d5bc55 Updated todo list 2017-04-18 11:22:17 +01:00
paboyle de41b84c5c Merge branch 'feature/normHP' into develop 2017-04-18 10:57:21 +01:00
paboyle 8e161152e4 MultiRHS solver improvements with slice operations moved into lattice and sped up.
Block solver requires a lot of performance work.
2017-04-18 10:51:55 +01:00
paboyle 3141ebac10 MultiRHS working, starting to optimise. Block doesn't and I thought it already was; puzzled. 2017-04-17 10:50:19 +01:00
paboyle 7ede696126 Non compile of tests fixed 2017-04-16 23:40:00 +01:00
paboyle bf516c3b81 higher precision reduction variables in norm and inner product 2017-04-15 12:27:28 +01:00
paboyle 441a52ee5d First cut at higher precision reduction 2017-04-15 10:57:21 +01:00
paboyle a8db024c92 Cleaning up the dense matrix and lanczos sector 2017-04-15 08:54:11 +01:00
paboyle a9c22d5f43 Verbose removal 2017-04-14 14:38:49 +01:00
paboyle 3ca41458a3 Fix to no USE_FP16 case 2017-04-14 14:20:54 +01:00
paboyle 9e2d29c644 USE_FP16 macro 2017-04-14 14:17:14 +01:00
Peter Boyle 951be75292 Half precision conversion working on AVX512 now too 2017-04-13 17:35:11 +01:00
Peter Boyle b9113ed310 Patches for knl 2017-04-13 12:02:12 -04:00
portelli 1407418755 Old qed-fvol program build disabled 2017-04-13 15:32:30 +01:00
portelli a6a0da873f Merge branch 'feature/hadrons' into feature/qed-fvol 2017-04-13 15:31:06 +01:00
paboyle 42fb49d3fd Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-04-13 14:12:47 +01:00
paboyle 2a54c9aaab Merge branch 'feature/block-cg' into develop 2017-04-13 14:12:24 +01:00
paboyle 0957378679 Fixing conditional ugly way 2017-04-13 13:47:56 +01:00
paboyle 2ed6c76fc5 Getting multiline if then fi working 2017-04-13 13:43:13 +01:00
paboyle d3b9a7fa14 F16c apparently requires AVX, even if the 128 bit are used.
Seems odd.
2017-04-13 13:19:11 +01:00
paboyle 75ea306ce9 Another try at travis 2017-04-13 13:05:32 +01:00
paboyle 4226c633c4 Default to FP16 off again 2017-04-13 12:51:39 +01:00
paboyle 5a4eafbf7e .travis 2017-04-13 12:50:43 +01:00
paboyle eb8e26018b Travis update for macos 2017-04-13 12:35:11 +01:00
paboyle db5ea001a3 Update to use Xcode 8.3 since -mfp16 causes SIGILL 2017-04-13 12:22:40 +01:00
paboyle 2846f079e5 Predicate tests on fp16 being enabled 2017-04-13 12:08:05 +01:00
paboyle 1d502e4ed6 FP16 optional compile time 2017-04-13 11:55:24 +01:00
paboyle 73cdf0fffe Drop f16c from SSE because of a macos compile error on travis 2017-04-13 11:23:41 +01:00
paboyle 1c25773319 Trap illegal instructions 2017-04-13 10:51:40 +01:00
paboyle c38400b26f Trap signals 2017-04-13 10:35:20 +01:00
paboyle 9c3065b860 Debug flags off again 2017-04-13 10:01:32 +01:00
paboyle 94eb829d08 Align cast fixed for __mm128i gcc complained 2017-04-13 08:40:44 +01:00
paboyle 68392ddb5b Exchange in generic
Precision change in AVX, SSE, AVX512, Generic. QPX still to do.
2017-04-13 08:38:12 +01:00
paboyle cb6b81ae82 Half precision conversion 2017-04-12 19:32:37 +01:00
portelli 90ec6eda0c Rare K test solver name fix 2017-04-10 17:48:58 +01:00
portelli fe8d625694 Merge commit '5e477ec553aa48d7d19b5a7c45d41acbb3392bcb' into feature/rare_kaon 2017-04-10 17:23:37 +01:00
portelli 53e76b41d2 Merge branch 'develop' into feature/hadrons 2017-04-10 17:00:53 +01:00
portelli 8ef4300412 spurious .dirstamp files removed 2017-04-10 17:00:22 +01:00
portelli 98a24ebf31 The macro “magics” is very intensive for the preprocessor in the measurement code which has numerous serialisable classes. Reducing the number of serialisable fields to 64 (instead of 1024) helps a lot, this is enough for now and can be extended trivially if needed in the future. 2017-04-10 16:58:54 +01:00
paboyle b12dc89d26 Commenting and clean up 2017-04-10 20:38:20 +09:00
paboyle d80d802f9d MultiRHS solver test 2017-04-10 00:12:12 +09:00
paboyle 3d99b09dba Start of blockCG 2017-04-09 23:42:10 +09:00
paboyle db5f6d3ae3 Verbose fix 2017-04-09 23:41:30 +09:00
paboyle 683550f116 Const args improvement 2017-04-09 23:41:04 +09:00
Lanny91 5e477ec553 Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/rare_kaon 2017-04-07 11:51:09 +01:00
paboyle 55d0329624 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2017-04-07 11:08:14 +09:00
paboyle 86aaa35294 Christoph needs SchurDiagTwoKappa which is mobius specific. 2017-04-07 11:07:40 +09:00
Guido Cossu 172d3dc93a Correcting names in tests 2017-04-05 16:24:04 +01:00
Guido Cossu 7b03d8d087 Fixing the remaining merge conflicts 2017-04-05 16:17:46 +01:00
Guido Cossu 4b759b8f2a Merge branch 'feature/hmc_generalise' into feature/scalar_adjointFT 2017-04-05 14:50:28 +01:00
Guido Cossu 8c540333d5 Merge branch 'develop' into feature/hmc_generalise 2017-04-05 14:41:04 +01:00
paboyle 5592f7b8c1 Creation mode better implementation 2017-04-05 02:35:34 +09:00
paboyle 35da4ece0b UID fix 2017-04-05 02:18:15 +09:00
paboyle 061b15b9e9 Merge branch 'feature/sitmo-skipahead' into develop 2017-04-05 01:24:49 +09:00
portelli ff4e54ef80 Merge branch 'develop' into feature/hadrons 2017-04-03 18:56:21 +01:00
paboyle 561426f6eb Clean up 2017-04-02 23:13:48 +09:00
paboyle 83f6fab8fa Big/Small crush test, and fast SITMO rng init, faster but not ideal
MT and Ranlux init.
2017-04-02 12:10:51 +09:00
paboyle 0fade84ab2 No random device 2017-04-02 00:29:40 +09:00
paboyle 9dc7ca4c3b Sitmo fast init 2017-04-02 00:28:22 +09:00
paboyle 935d82f5b1 sanity checks 2017-04-02 00:27:28 +09:00
paboyle 9cbcdd65d7 No random device seed 2017-04-02 00:26:57 +09:00
paboyle f18f5ed926 Drop random device 2017-04-02 00:26:26 +09:00
paboyle d1d63a4f2d sitmo default 2017-04-02 00:26:05 +09:00
paboyle 7e5faa0f34 Multiple RNGs 2017-04-02 00:25:44 +09:00
paboyle 6af459cae4 Christoph's coefficients. 2017-03-31 17:07:43 +09:00
paboyle 1c4bc7ed38 Debugged staggered conventions 2017-03-31 14:41:48 +09:00
Lanny91 cd1bd921bd Reduced code duplication for Weak Hamiltonian contraction modules 2017-03-30 18:02:14 +01:00
Lanny91 fff5751b1a HADRONS: Updated rare kaon test program, including all contractions. Sink smearing still to be implemented. 2017-03-30 10:57:01 +01:00
Lanny91 2c81696fdd HADRONS: 4pt Weak + current disconnected topology (e.g. for rare neutral kaon decays) 2017-03-30 10:37:17 +01:00
Lanny91 c9dc22efa1 HADRONS: Standalone disconnected loop contraction. 2017-03-30 10:33:18 +01:00
Lanny91 0ab04a000f HADRONS: 3pt contraction with gamma insertion between two propagators. 2017-03-30 10:30:58 +01:00
paboyle 93ea5d9468 Pretty code 2017-03-30 15:00:03 +09:00
paboyle 1ec5d32369 Chulwoo's test to zmobius helped me shake out 2017-03-30 13:45:13 +09:00
paboyle 9fd23faadf Pretty layout 2017-03-30 13:44:45 +09:00
paboyle 10e4fa0dc8 Template instantiation improvements 2017-03-30 13:44:25 +09:00
paboyle c4aca1dde4 Conjugate coefficients on adjoint 2017-03-30 13:44:05 +09:00
paboyle b9e8ea3aaa conjugate coefficient on the dagger 2017-03-30 13:43:13 +09:00
paboyle 077aa728b9 Fix the ZMobius (I think) 2017-03-30 13:42:09 +09:00
paboyle a8d83d886e Macro controls 2017-03-30 13:31:34 +09:00
paboyle 7fd46eeec4 Trailing whitespace removal 2017-03-30 13:31:10 +09:00
paboyle e0c4eeb3ec Compiles again 2017-03-30 13:30:45 +09:00
paboyle cb9a297a0a Chulwoo's Zmobius test 2017-03-30 13:30:25 +09:00
paboyle 2b115929dc Small AVX512 asm ifdef patch 2017-03-29 18:51:23 +09:00
paboyle 5c6571dab1 Merge branch 'feature/bgq-asm' into develop 2017-03-29 18:48:55 +09:00
paboyle 417ec56cca Release candidate 2017-03-29 05:45:33 -04:00
paboyle 756bc25008 Verbose header print by default 2017-03-29 04:44:17 -04:00
paboyle 35695ba57a Bug fix in MPI3 2017-03-29 04:43:55 -04:00
paboyle 81ead48850 Log any errors to a file 2017-03-29 04:39:52 -04:00
paboyle d805867e02 Better init 2017-03-28 13:25:05 -04:00
paboyle e55a751e23 Merge branch 'feature/bgq-asm' of https://github.com/paboyle/Grid into feature/bgq-asm 2017-03-28 12:20:12 -04:00
paboyle 358eb75995 Shorten loop 2017-03-28 12:20:02 -04:00
paboyle 98f9318279 Build on AVX2 and MPI passing with clang++ 2017-03-28 23:16:04 +09:00
paboyle 4b17e8eba8 Merge branch 'develop' into feature/bgq-asm
Conflicts:
	lib/qcd/action/fermion/Fermion.h
	lib/qcd/action/fermion/WilsonFermion.cc
	lib/util/Init.cc
	tests/Test_cayley_even_odd_vec.cc
2017-03-28 04:49:30 -04:00
paboyle 75112a632a IO improvements to fail on IO error 2017-03-28 02:28:04 -04:00
paboyle 18bde08d1b Merge branch 'feature/staggering' into develop 2017-03-28 15:25:55 +09:00
paboyle d45cd7e677 Adding a simple read of NERSC test 2017-03-26 09:24:26 -04:00
paboyle 4e96679797 Added a bnl log 2017-03-25 09:25:46 -04:00
Guido Cossu 4c1ea8677e Small cosmetic changes and vscode gitignore 2017-03-23 14:09:35 +09:00
paboyle fc93f0b2ec Save some code for static huge tlb's. It is ifdef'ed out but an interesting root only experiment.
No gain from it.
2017-03-21 22:30:29 -04:00
paboyle 8c8473998d Average over whole cluster the comm time. 2017-03-21 22:29:51 -04:00
Guido Cossu 120fb59978 Adding tests for WilsonFlow classes 2017-03-21 16:11:35 +09:00
Guido Cossu fd56b3ff38 Merge branch 'develop' into feature/hmc_generalise 2017-03-21 13:33:41 +09:00
Guido Cossu 0ec6829edc Fixing compilation errors for the WilsonFlow 2017-03-21 13:06:32 +09:00
Guido Cossu 18b7845b7b Adding WilsonFlow smearing 2017-03-21 11:52:05 +09:00
Guido Cossu 3d0fe15374 Added topological charge measurement 2017-03-17 16:14:57 +09:00
Guido Cossu 91886068fe Fixed seg fault for observable modules 2017-03-17 13:59:31 +09:00
Guido Cossu 6d1e9e5f92 Small cleanup of the observables 2017-03-17 11:42:55 +09:00
Guido Cossu b640230b1e Moving hmc observables in a different directory 2017-03-17 11:40:17 +09:00
paboyle e7c36771ed ZMobius prep for asm 2017-03-15 14:23:33 -04:00
Guido Cossu 038b6ee9cd Fixing JSON compilation error 2017-03-16 01:09:24 +09:00
Guido Cossu 38806343a8 Improving efficiency of the force term 2017-03-15 15:16:16 +09:00
Guido Cossu 831ca4e3bf Added Scalar action for fields in the adjoint representation 2017-03-14 14:55:18 +09:00
paboyle 8dc57a1e25 Layout change 2017-03-13 11:11:46 +00:00
paboyle f57bd770b0 Merge branch 'bugfix/dminus' into feature/bgq-asm 2017-03-13 11:11:03 +00:00
paboyle 4ed10a3d06 Merge branch 'develop' into feature/bgq-asm 2017-03-13 11:10:10 +00:00
Peter Boyle dfefc70b57 Merge pull request #93 from Lanny91/hotfix/qpx
Some fixes for QPX and generic SIMD types.
2017-03-13 09:31:26 +00:00
paboyle b64e004555 MPI run fail on macos 2017-03-13 01:59:01 +00:00
paboyle 447c5e6cd7 Z mobius hermiticity correction 2017-03-13 01:30:43 +00:00
paboyle 8b99d80d8c Merge branch 'bgq-asm-shmemfixes' into feature/bgq-asm 2017-03-12 23:30:09 +00:00
Guido Cossu b3dede4dd3 Merge branch 'develop' into feature/hmc_generalise 2017-03-10 23:57:37 +09:00
Guido Cossu 4e34132f4d Correcting modules use in test files 2017-03-10 23:54:53 +09:00
Guido Cossu c07cb10247 Merge branch 'feature/hmc_generalise' of https://github.com/paboyle/Grid into feature/hmc_generalise 2017-03-10 22:37:25 +09:00
Guido Cossu d7767a2a62 Few more tests 2017-03-10 22:33:48 +09:00
Guido Cossu ec035983fd Fixing the implicit integration 2017-03-01 11:56:35 +00:00
paboyle 3901b17ade timeings from BNL 2017-02-28 17:06:45 -05:00
paboyle af230a1fb8 Average the time across the whole machine for outliers 2017-02-28 17:05:22 -05:00
Christopher Kelly 06a132e3f9 Fixes to SHMEM comms 2017-02-28 13:31:54 -08:00
Guido Cossu 596dcd85b2 Auxiliary fields 2017-02-27 13:16:38 +00:00
paboyle 96d44d5c55 Header fix 2017-02-24 19:12:11 -05:00
Guido Cossu 7270c6a150 Integrator works now 2017-02-24 17:03:42 +00:00
Lanny91 7fe797daf8 SIMD vector length sanity checks 2017-02-23 16:49:44 +00:00
Lanny91 486a01294a Corrected QPX SIMD width 2017-02-23 16:47:56 +00:00
paboyle 586a7c90b7 Merge branch 'develop' into feature/bgq-asm 2017-02-23 00:26:59 +00:00
paboyle e099dcdae7 Merge branch 'develop' into feature/bgq-asm 2017-02-23 00:25:29 +00:00
paboyle 4e7ab3166f Refactoring header layout 2017-02-22 18:09:33 +00:00
paboyle aac80cbb44 Bug fix from Chris K 2017-02-22 12:19:09 -05:00
Lanny91 c80948411b Added tRotate function and MaddRealPart struct for generic SIMD, bugfix in MultRealPart and minor cosmetic changes. 2017-02-22 14:57:10 +00:00
Lanny91 95625a7bd1 Use Grid Integer type 2017-02-22 13:09:32 +00:00
Lanny91 0796696733 Emulated integer vector type for QPX and generic SIMD instruction sets. 2017-02-22 12:01:36 +00:00
Peter Boyle f8b9ad7d50 Merge pull request #91 from sunpho84/public_modules_memebers
making public same serializable parameters in HMC Module
2017-02-22 00:53:20 +00:00
Peter Boyle 04a1959895 Merge pull request #90 from sunpho84/liming
adding --with switch to pass lime path
2017-02-22 00:52:53 +00:00
azusayamaguchi 1c30e9a961 Verified 2017-02-21 23:01:25 +00:00
Francesco Sanfilippo 93cc270016 making public same serializable parameters in HMC Module
RNGModuleParameters
GridModuleParameters
2017-02-21 23:11:56 +01:00
Francesco Sanfilippo 29b60f7e1a adding --with switch to pass lime path 2017-02-21 23:09:39 +01:00
azusayamaguchi bf7e3f20d4 Staggaered fermion optimised version 2017-02-21 14:35:42 +00:00
Guido Cossu 902afcfbaf Adding metric and the implicit steps 2017-02-21 11:30:57 +00:00
paboyle 3ae92fa2e6 Global changes to parallel_for structure.
Move the comms flags to more sensible names
2017-02-21 05:24:27 -05:00
paboyle 3906cd2149 Stencil fix on BNL KNL system 2017-02-20 17:51:31 -05:00
paboyle 5a1fb29db7 Useful debug code info to preserve 2017-02-20 17:49:23 -05:00
paboyle 661fc4d3d1 Debug AVX512 exchange code paths 2017-02-20 17:48:36 -05:00
paboyle 41009cc142 Move excange into the stencil only; keep Cshift fully general 2017-02-20 17:48:04 -05:00
paboyle 37720c4db7 Count bytes off node only 2017-02-20 17:47:40 -05:00
paboyle 1a30455a10 1000 iters on bmark for more accurate timing 2017-02-20 17:47:01 -05:00
Guido Cossu 97a6b61551 Covariant laplacian and implicit integration 2017-02-20 11:17:27 +00:00
paboyle cd0da81196 Merge branch 'feature/bgq-asm' of https://github.com/paboyle/Grid into feature/bgq-asm 2017-02-16 18:52:30 -05:00
paboyle f246fe3304 Improvements to avx for invertible to avoid latent bug 2017-02-16 23:52:44 +00:00
paboyle 8a29c16bde Faster gather exchange 2017-02-16 23:52:22 +00:00
paboyle d68907fc3e Debug temp 2017-02-16 18:51:35 -05:00
paboyle 5c0adf7bf2 Make clang happy with parenthesis 2017-02-16 23:51:33 +00:00
paboyle be3a8249c6 Faster gather 2017-02-16 23:51:15 +00:00
paboyle bd600702cf Vectorise the XYZT face gathering better.
Hard coded for simd_layout <= 2 in any given spread out direction; full generality is inconsistent
with efficiency.
2017-02-15 11:11:04 +00:00
Lanny91 f011bdb869 Fixed overwrite of pminus projection in construction of 4d propagator from 5d. 2017-02-14 14:07:17 +00:00
Guido Cossu bafb101e4f Testing different versions of the Laplacian 2017-02-13 15:38:11 +00:00
Guido Cossu 08fdf05528 Added and tested the covariant laplacian + CG solver 2017-02-13 15:05:01 +00:00
paboyle aca7a3ef0a Optimisation control improvements 2017-02-10 18:22:31 -05:00
Guido Cossu 9e72a6b22e Reverting to Xcode 7.3 2017-02-10 12:57:03 +00:00
Guido Cossu 1c12c5612c Xcode 8.2 for travis 2017-02-10 12:12:01 +00:00
Guido Cossu a8193c4bcb Correcting travis compilation on gcc 2017-02-10 10:59:30 +00:00
Guido Cossu c3d7ec65fa All tests compile. 2017-02-10 10:27:51 +00:00
Guido Cossu 8b6a6c8236 Resolving small merge conflict 2017-02-09 16:20:24 +00:00
Guido Cossu e0571c872b Merge branch 'develop' into feature/hmc_generalise 2017-02-09 16:12:00 +00:00
Guido Cossu c67f41887b Reverting parameters to original 2017-02-09 15:59:56 +00:00
Guido Cossu 84687ccf1f Handling an Intel compiler warning for Json class 2017-02-09 15:33:33 +00:00
Guido Cossu 3274561cf8 Cleanup 2017-02-09 15:18:38 +00:00
portelli e08fbb3771 Merge pull request #84 from Lanny91/feature/rare_kaon
Rare Kaon decay contraction code
2017-02-08 08:23:34 -08:00
Lanny91 d7464aa0fe Switched from XmlWriter to CorrWriter in contraction code 2017-02-08 16:13:44 +00:00
Lanny91 00d29153f0 Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/rare_kaon 2017-02-08 16:11:15 +00:00
portelli 2ce989f220 Hadrons: default I/O to HDF5 2017-02-08 07:50:05 -08:00
Lanny91 d7a1dc85be Revert "Hadrons: test for rare kaon contraction code."
This reverts commit 1e257a1251.
2017-02-08 13:23:05 +00:00
Lanny91 fc19503673 Removed MSink namespace. 2017-02-08 13:17:39 +00:00
Lanny91 beba824136 Make use of GammaL class in Weak Hamiltonian contractions 2017-02-08 12:45:39 +00:00
Lanny91 6ebf8b12b6 Removed unnecessary repeat of write in Weak Hamiltonian contractions 2017-02-08 12:43:13 +00:00
Lanny91 e5a7ed4362 Moved write outside of loop, some physics corrections 2017-02-08 12:29:33 +00:00
Lanny91 b9f7ea47c3 Access hasModule function directly from Environment instance. 2017-02-08 10:10:06 +00:00
Lanny91 06f7ee202e Revert "Add function to say whether or not a module exists in application class"
This reverts commit 522f6bf91a.
2017-02-08 10:08:18 +00:00
Lanny91 2b2fc6453f Fixed single precision compatibility issues 2017-02-07 13:59:29 +00:00
Lanny91 bdd2765461 Added missing allocation of Weak Hamiltonian result vector 2017-02-07 13:06:42 +00:00
paboyle 2c246551d0 Overlap comms and compute options in wilson kernels 2017-02-07 01:37:10 -05:00
paboyle 71ac2e7940 Faster RNG init 2017-02-07 01:33:23 -05:00
paboyle 2bf4688e83 Running on BNL KNL 2017-02-07 01:32:10 -05:00
paboyle a48ee6f0f2 Don't use MPI3_leader any more. No real gain and complex 2017-02-07 01:31:24 -05:00
paboyle 73547cca66 MPI3 working i think 2017-02-07 01:30:02 -05:00
paboyle 123c673db7 Policy to control async or sync SendRecv 2017-02-07 01:24:54 -05:00
paboyle 61f82216e2 Communicator Policy, NodeCount distinct from Rank count 2017-02-07 01:22:53 -05:00
paboyle 8e7ca92278 Debugged cshift case 2017-02-07 01:21:32 -05:00
paboyle 485ad6fde0 Stencil working in SHM MPI3 2017-02-07 01:20:39 -05:00
paboyle 6ea2184e18 OMP define change 2017-02-07 01:17:16 -05:00
paboyle fdc170b8a3 Parallel fors in lattice transfer 2017-02-07 01:16:39 -05:00
paboyle 060da786e9 Comms benchmark improvements 2017-02-07 01:07:39 -05:00
paboyle 85c7bc4321 Bug fixes for cases that physics code couldn't hit but latent
and discovered on KNL (long vector, y SIMD dir) and checker dir set to y.
Remove the assertions on these code paths now they are tested.
2017-02-07 01:01:15 -05:00
paboyle 0883d6a7ce Overlap comms compute support; make reg naming consistent with bgq aasm 2017-02-07 00:59:32 -05:00
paboyle 9ff97b4711 Improved stencil tests passing all on KNL multinode 2017-02-07 00:58:34 -05:00
paboyle b5e9c900a4 Better printing and signal handling options 2017-02-07 00:57:55 -05:00
paboyle 4bbdfb434c Overlap comms compute modifications 2017-02-07 00:57:01 -05:00
Lanny91 4a45c06dd7 Code cleaning and addition of Weak Hamiltonian contraction log message 2017-02-06 20:12:30 +00:00
Lanny91 d6a7d7d1e0 Hadrons: added missing momentum parameter in rare kaon contraction test 2017-02-06 18:15:49 +00:00
Lanny91 1a122a0dd8 Hadrons: corrected gamma matrix inputs in rare kaon test 2017-02-06 17:35:41 +00:00
Lanny91 20e20733e8 Merge branch 'feature/hadrons' into feature/rare_kaon 2017-02-06 14:12:21 +00:00
Lanny91 b7cd1a19e3 Utilities for reading and writing "pair" objects. 2017-02-06 14:08:59 +00:00
Lanny91 f510002a62 Merge remote-tracking branch 'paboyle/feature/hadrons' into feature/hadrons 2017-02-03 14:37:34 +00:00
portelli eedcaf6470 Merge branch 'feature/hadrons' into feature/qed-fvol 2017-02-01 15:53:10 -08:00
Lanny91 1e257a1251 Hadrons: test for rare kaon contraction code. 2017-02-01 16:36:40 +00:00
Lanny91 522f6bf91a Add function to say whether or not a module exists in application class 2017-02-01 16:36:08 +00:00
Lanny91 d35d87d2c2 Weak Hamiltonian Eye-type contraction execution 2017-02-01 16:33:24 +00:00
Lanny91 74a5cda84b Removed unnecessary "3pt" labels 2017-02-01 15:03:49 +00:00
Lanny91 5be05d85b8 Fixed collision of Wall source and sink header ifdefs 2017-02-01 13:56:22 +00:00
Lanny91 35ac85aea8 Updated Weak Hamiltonian contractions to use zero-flop gamma matrices 2017-02-01 12:57:34 +00:00
Lanny91 fa237401ff Consistent variable name in macro 2017-02-01 12:56:55 +00:00
Lanny91 97053adcb5 Merge branch 'feature/hadrons' into feature/rare_kaon 2017-02-01 10:13:29 +00:00
Lanny91 f8fbe4d7a3 Merge remote-tracking branch 'paboyle/feature/hadrons' into feature/hadrons
# Conflicts:
#	extras/Hadrons/Modules/MContraction/Meson.hpp
#	tests/hadrons/Test_hadrons_meson_3pt.cc

Updated Meson.hpp to utilise zero-flop gamma matrices.
2017-02-01 09:27:00 +00:00
Lanny91 ef31c012bf Merge remote-tracking branch 'paboyle/develop' into feature/hadrons 2017-01-31 17:36:10 +00:00
Lanny91 9e9f621d5d Hadrons: added Weak Hamiltonian module dependencies, some reformatting. 2017-01-30 17:54:21 +00:00
Lanny91 651e1a7cbc Hadrons: Momentum inserted as multiples of 2*pi/L 2017-01-30 17:14:33 +00:00
Lanny91 c4d3672720 Hadrons: Momentum projection in meson module. 2017-01-30 17:09:04 +00:00
Guido Cossu 16be6d378c Now action factory support different Fields (templated) 2017-01-30 14:22:41 +00:00
Guido Cossu f05d0565aa Adding ScalarField theory 2017-01-30 10:59:28 +00:00
portelli b39f0d1fb6 Hadrons: default I/O to HDF5 if possible, XML otherwise 2017-01-27 18:12:35 -08:00
portelli 9f1267dfe6 Merge branch 'feature/qed-fvol' of github.com:paboyle/Grid into feature/qed-fvol 2017-01-27 17:06:34 -08:00
portelli 2e90285232 Merge pull request #80 from jch1g10/feature/qed-fvol
ChargedProp: remove ScalarField fs
2017-01-27 17:06:13 -08:00
portelli e254de982e Merge branch 'feature/qed-fvol' of github.com:paboyle/Grid into feature/qed-fvol 2017-01-27 17:02:35 -08:00
portelli 28d99b5297 Merge branch 'develop' into feature/qed-fvol 2017-01-27 16:59:53 -08:00
Lanny91 9bf4108d1f Weak Hamiltonian contraction modules, for Eye and NonEye contraction topologies. Execution for NonEye type diagrams has been implemented, but not yet for Eye type. 2017-01-27 16:58:11 +00:00
James Harrison ee93f0218b ChargedProp: remove ScalarField fs 2017-01-27 12:22:48 +00:00
Guido Cossu 6929a84c70 Reformatting files 2017-01-27 11:54:44 +00:00
Guido Cossu 5c779a789b Moving registrations in an independent file 2017-01-27 11:23:51 +00:00
portelli 161ed102a5 Merge pull request #79 from jch1g10/feature/qed-fvol
Fixed bug in ChargedProp
2017-01-26 19:49:14 -08:00
Guido Cossu e863a948e3 Cleaning up files and directories 2017-01-26 15:24:49 +00:00
James Harrison f65a585236 ChargedProp: Switch to HDF5 output 2017-01-26 15:02:30 +00:00
Lanny91 977f34dca6 Added missing typename 2017-01-26 13:18:33 +00:00
Lanny91 90ad956340 Merge branch 'develop' of https://github.com/paboyle/Grid into feature/rare_kaon 2017-01-26 12:08:41 +00:00
Guido Cossu 7996f06335 Commented out registrations.
Move to an independent file that is linked only for the factory managed HMC
2017-01-25 18:27:45 +00:00
Guido Cossu 7b40a3e3e5 Reorganizing files 2017-01-25 18:09:46 +00:00
Guido Cossu f7fbbaaca3 Compiles after merging 2017-01-25 12:11:58 +00:00
Guido Cossu 17629b8d9e Merge branch 'develop' into feature/hmc_generalise 2017-01-25 11:33:53 +00:00
Guido Cossu 0baa20d292 Againg fixing compilation on Travis, no LIME lib present 2017-01-25 11:18:44 +00:00
Guido Cossu 4571c918a4 Fixing compilation error when compiling without LIME 2017-01-25 11:14:43 +00:00
Guido Cossu 5251ea4d30 Adding more fermion action modules, generalised DWF 2017-01-25 11:10:44 +00:00
Guido Cossu 7f456b4173 👷 Added all pseudofermion actions to the serialiser 2017-01-24 13:57:32 +00:00
James Harrison ae99e99da2 Fixed bug in ChargedProp 2017-01-23 17:27:50 +00:00
Lanny91 c291ef77b5 Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/hadrons 2017-01-23 15:24:47 +00:00
Lanny91 7dd2764bb2 Wall sink smearing 2017-01-23 15:17:54 +00:00
Guido Cossu 244f8fb6dc Added JSON parser (without NextElement) 2017-01-23 14:57:38 +00:00
azusayamaguchi 05c1924819 Timing loop change 2017-01-23 10:43:45 +00:00
portelli f3ca29af6c Merge branch 'feature/hadrons' into feature/qed-fvol 2017-01-21 13:41:05 -08:00
portelli 37988221a8 Merge branch 'feature/serialisation-hdf5' into feature/qed-fvol 2017-01-20 14:04:20 -08:00
Guido Cossu 27dfe816fa Added TwoFlavorsEO
Had to remove a conformability check in the Derivative of SchurDiff,
see the comments in the file
2017-01-20 16:59:31 +00:00
Lanny91 af29be2c90 Simplified operation of meson module. Result has been modified to output one contraction at a time for each pair of gamma insertions at source and sink. 2017-01-20 16:38:50 +00:00
Guido Cossu f96fac0aee All functionalities ready.
Todo: add all the fermion action modules
2017-01-20 12:56:20 +00:00
portelli 7a327a3f28 Merge branch 'develop' into feature/qed-fvol 2017-01-19 14:22:36 -08:00
Lanny91 07f2ebea1b Meson module now takes list of gamma matrices to insert at source and sink. 2017-01-19 22:18:42 +00:00
Guido Cossu 851f2ad8ef Adding fermions actions support in the factories 2017-01-19 10:00:02 +00:00
Guido Cossu 23e0561dd6 Added all required functionalities, time for cleaning
All actions to be added
2017-01-18 16:31:51 +00:00
Lanny91 8ae1a95ec6 Legal banners and module descriptions 2017-01-17 18:14:20 +00:00
Lanny91 82b7d4eaf0 Added noise loop dependencies 2017-01-17 15:58:32 +00:00
Lanny91 78774fbdc0 Construct loop propagator 2017-01-17 15:29:45 +00:00
Guido Cossu 924130833e Moved more parameters to serialization 2017-01-17 13:22:18 +00:00
Guido Cossu 0157274762 HMC factories 2017-01-17 10:46:49 +00:00
Guido Cossu 87e8aad5a0 Added support for input file HMC modules (missing the actions yet) 2017-01-16 16:07:12 +00:00
Guido Cossu c6f59c2933 Adding factories 2017-01-16 10:18:09 +00:00
Lanny91 b7f90aa011 Added momentum choice for wall source 2017-01-13 15:54:19 +00:00
portelli 92f8950a56 Charged scalar prop: cleaning and output 2017-01-13 13:30:56 +00:00
portelli 65987a8a58 First implementation of the scalar QED propagator, runs but absolutely not checked 2017-01-12 20:44:23 +00:00
portelli 889d828bc2 Code cleaning 2017-01-12 18:17:44 +00:00
Lanny91 f22b79da8f Added missing type aliases 2017-01-12 12:52:12 +00:00
Lanny91 3855673ebf Added header for wall source 2017-01-12 11:42:37 +00:00
Lanny91 4db82da0db Wall sources 2017-01-12 11:41:10 +00:00
Lanny91 0cdc3d2fa5 Merge remote-tracking branch 'refs/remotes/paboyle/feature/hadrons' into feature/hadrons 2017-01-12 11:26:55 +00:00
portelli ad98b6193d creating the necessary caches for the FFT EM scalar propagator 2017-01-11 18:40:43 +00:00
portelli fc760016b3 More uniform cache name for scalar momentum propagators 2017-01-11 18:39:58 +00:00
portelli 2da86f7dae Merge branch 'feature/hadrons' into feature/qed-fvol 2017-01-11 18:38:05 +00:00
Guido Cossu 0dfda4bb90 Working on the RNGModule 2017-01-09 11:06:18 +00:00
Guido Cossu 1189ebc8b5 Cleaning up the checkpointers interface 2017-01-05 15:52:52 +00:00
portelli 97843e2b58 Hadrons: free scalar buffer fix and output 2017-01-05 14:58:55 +00:00
portelli 82b3f54697 scalar free propagator fix 2017-01-05 14:58:07 +00:00
Guido Cossu 1bb8578173 Added module for checkpointers 2017-01-05 13:09:32 +00:00
Peter Boyle c3b6d573b9 Merge branch 'feature/bgq-asm' of https://github.com/paboyle/Grid into feature/bgq-asm 2016-12-30 22:42:17 +00:00
portelli 673994b281 Hadrons: modules for scalar propagators 2016-12-29 22:44:58 +01:00
portelli bbc0eff078 Hadrons: scalar sources 2016-12-29 22:44:22 +01:00
portelli 4c60e31070 Hadrons: code cleaning 2016-12-29 22:44:08 +01:00
portelli afbf7d4c37 QED Gimpl moved in Photon.h 2016-12-29 22:43:38 +01:00
portelli 8c3cc32364 Scalar action 2016-12-29 22:42:58 +01:00
Peter Boyle 1e179c903d Worried about integer; suspect where statements are broken 2016-12-27 17:46:38 +00:00
Peter Boyle 669cfca9b7 No inline 2016-12-27 17:45:40 +00:00
Peter Boyle ff2f559a57 Remove inline on gather optimised path 2016-12-27 17:45:19 +00:00
Peter Boyle 03c81bd902 Merge branch 'feature/bgq-asm' of https://github.com/paboyle/Grid into feature/bgq-asm 2016-12-27 11:25:35 +00:00
Peter Boyle a869addef1 Stats switch off 2016-12-27 11:25:22 +00:00
Peter Boyle 1caa3fbc2d LOCK UNLOCK only 2016-12-27 11:24:45 +00:00
Peter Boyle 3d21297bbb Call the fast path compressor for wilson kernels to avoid if else on projector 2016-12-27 11:23:13 +00:00
Peter Boyle 25efefc5b4 Back to original thread policy post test 2016-12-23 09:49:04 +00:00
Peter Boyle eabf316ed9 BGQ performance ASM 2016-12-22 21:56:08 +00:00
Peter Boyle 04ae7929a3 BGQ or KNL assembler now 2016-12-22 17:53:22 +00:00
Peter Boyle caba0d42a5 L1p controls 2016-12-22 17:52:55 +00:00
Peter Boyle 9ae81c06d2 L1p controls for BG/Q 2016-12-22 17:52:21 +00:00
Peter Boyle 0903c48caa Hot start SU3 2016-12-22 17:51:45 +00:00
Peter Boyle 7dc36628a1 QPX finishing 2016-12-22 17:50:48 +00:00
Peter Boyle b8cdb3e90a Debug hack; raises from 62GF/s to 72 GF/s per node on BG/Q 2016-12-22 17:50:14 +00:00
Peter Boyle 5241245534 Default to static scheduling 2016-12-22 17:49:21 +00:00
Dr Peter Boyle 960316e207 type conversion in printf 2016-12-22 17:27:01 +00:00
Guido Cossu 5214846341 Adding a resource manager 2016-12-22 12:41:56 +00:00
portelli 4c3fd9fa3f stochastic QED field module in Hadrons 2016-12-22 00:29:41 +01:00
portelli 17b3a10d46 stochastic QED: function to cache 1/sqrt(khat^2) 2016-12-22 00:29:19 +01:00
portelli 149a46b92c Merge branch 'feature/hadrons' into feature/qed-fvol 2016-12-22 00:26:43 +01:00
Guido Cossu ce1a115e0b Removing redundant arguments for integrator functions, step 1 2016-12-20 17:51:30 +00:00
portelli db9c28a773 qed-fvol: Photon parameter name fix 2016-12-20 12:41:39 +01:00
portelli 9ac3ac41df serialisable Photon parameters 2016-12-20 12:41:01 +01:00
portelli 2af9ab9034 old Makefile cleaning 2016-12-20 12:40:26 +01:00
portelli 6f1ea96293 Merge branch 'develop' into feature/qed-fvol 2016-12-20 12:33:02 +01:00
paboyle 3f2d53a994 BGQ assembler beginning 2016-12-20 10:21:26 +00:00
azusayamaguchi df9108154d Debugged 2 versions of assembler; ls vectorised, xyzt vectorised 2016-12-17 23:47:51 +00:00
azusayamaguchi b3e7f600da Partial implementation of 4d vectorisation assembler 2016-12-16 23:50:30 +00:00
azusayamaguchi d4071daf2a Template specialise 2016-12-16 22:28:29 +00:00
azusayamaguchi a2a6329094 AVX512 only for ASM compilation 2016-12-16 22:03:29 +00:00
azusayamaguchi eabc577940 Assembler possibly working 2016-12-16 16:55:36 +00:00
portelli 2e3c5890b6 qed-fvol: build fix 2016-12-15 20:06:46 +00:00
portelli bc6678732f Merge branch 'feature/hadrons' into feature/qed-fvol
# Conflicts:
#	Makefile.am
#	configure.ac
#	lib/qcd/action/gauge/Photon.h
2016-12-15 19:53:00 +00:00
portelli b10ae00c8a Merge commit '6ad73145bc9754a5f26093eee5a34473ba0cff82' into feature/qed-fvol 2016-12-15 19:48:58 +00:00
Azusa Yamaguchi 0cd6b1858c Merge branch 'develop' of https://github.com/paboyle/Grid into feature/staggering 2016-12-14 09:23:22 +00:00
Guido Cossu 0bd296dda4 Adding check of the Dag part in the benchmark 2016-12-14 03:15:09 +00:00
Guido Cossu af0ccdd8e9 Moving output order 2016-12-14 02:02:42 +00:00
Guido Cossu 2fb92dbc6e Cleaning up previous debug lines 2016-12-13 07:53:43 +00:00
Guido Cossu 5c74b6028b Commit for debugging, lot of IO 2016-12-13 06:35:30 +00:00
Guido Cossu e0be2b6e6c Adding a new tests for the Ls vec CG 2016-12-13 04:59:18 +00:00
Guido Cossu ef72f322d2 consistency of tests 2016-12-13 02:24:20 +00:00
Azusa Yamaguchi 426197e446 Nc=3 2016-12-12 09:10:54 +00:00
Azusa Yamaguchi 99e2c1e666 Kernels options 2016-12-12 09:08:53 +00:00
Azusa Yamaguchi 1440565a10 Decrease verbosity 2016-12-12 09:08:04 +00:00
Azusa Yamaguchi e9f0c0ea39 Staggered kernels options 2016-12-12 09:07:38 +00:00
Guido Cossu 7bc2065113 Adding report at the end of the DWF HMC tests 2016-12-12 04:21:34 +00:00
Guido Cossu 2bd4233919 Completed testing of the HMC for Ls vectorised version (on AVX2) 2016-12-07 04:56:37 +00:00
Guido Cossu 143c70e29f Debugged the threaded version. Cleaning up 2016-12-07 04:40:25 +00:00
Guido Cossu b812d5e39c Added single threaded version of the derivative for the Ls vectorised DWF 2016-12-06 16:31:13 +00:00
Guido Cossu 01480da0a8 Merge branch 'develop' into feature/hmc_generalise 2016-12-05 05:10:27 +00:00
James Harrison 6ad73145bc Calculate Wilson loop average over multiple configurations. 2016-11-30 15:17:22 +00:00
Azusa Yamaguchi c097fd041a Merge branch 'develop' of https://github.com/paboyle/Grid into feature/staggering 2016-11-29 13:44:17 +00:00
Azusa Yamaguchi 77fb25fb29 Push 5d tests 2016-11-29 13:43:56 +00:00
Azusa Yamaguchi 389e0a77bd Staggerd Fermion 5D 2016-11-29 13:13:56 +00:00
portelli f7293f2ddb Merge pull request #69 from jch1g10/feature/qed-fvol 2016-11-26 07:04:04 +09:00
Azusa Yamaguchi 95f43d27ae Merge branch 'develop' of https://github.com/paboyle/Grid into feature/staggering 2016-11-22 13:49:22 +00:00
Azusa Yamaguchi 668ca57702 Merge branch 'develop' of https://github.com/paboyle/Grid into feature/staggering 2016-11-22 13:49:11 +00:00
Guido Cossu 62749d05a6 Naming the scalar action 2016-11-17 12:26:20 +00:00
Guido Cossu 3834feb4b7 Adding action names 2016-11-16 16:46:49 +00:00
James Harrison 6b8ee7bae0 Merge branch 'feature/feynman-rules' into feature/qed-fvol 2016-11-15 13:08:08 +00:00
James Harrison 739c2308b5 Set imaginary part of stochastic QED field to zero using real() instead of conjugate(). 2016-11-15 13:07:52 +00:00
Guido Cossu 454302414d Small modif at the test hmc 2016-11-15 12:31:13 +00:00
James Harrison a71b69389b QedFVol: calculate square Wilson loops up to 10x10 2016-11-14 18:23:04 +00:00
James Harrison d49e502f53 Merge branch 'feature/feynman-rules' into feature/qed-fvol 2016-11-14 18:00:33 +00:00
James Harrison 92ec3404f8 Set imaginary part of stochastic QED field to zero after FFT into position space 2016-11-14 17:59:02 +00:00
James Harrison f4ebea3381 QedFVol: add functions for computing spatial and timelike Wilson loops 2016-11-14 17:51:53 +00:00
James Harrison cf167d0cd1 QedFVol: implement exponentiation of photon field 2016-11-14 17:02:29 +00:00
Guido Cossu 6f8b771a37 Adding date of the last commit 2016-11-10 18:52:00 +00:00
Guido Cossu 4e1ffdd17c Adding git info to the configure output 2016-11-10 18:44:36 +00:00
Guido Cossu a783282b8b Merge branch 'develop' into feature/hmc_generalise 2016-11-10 18:13:07 +00:00
Guido Cossu 19b85d8486 Some comments in the hmc files 2016-11-10 17:55:58 +00:00
paboyle c363bdd784 Merge branch 'release/v0.6.0' 2016-11-09 12:43:14 +00:00
James Harrison c30d96ea50 QedFVol: x86intrin.h namespace fix 2016-11-09 11:06:20 +00:00
James Harrison 7ffe17ada1 Merge branch 'feature/feynman-rules' into feature/qed-fvol 2016-11-08 14:52:43 +00:00
Azusa Yamaguchi ee686a7d85 Compiles now 2016-11-03 16:58:23 +00:00
Azusa Yamaguchi 1c5b7a6be5 Staggered phases first cut, c1, c2, u0 2016-11-03 16:26:56 +00:00
portelli 330a9b3f4c Merge pull request #65 from jch1g10/feature/qed-fvol 2016-11-01 19:53:25 +00:00
James Harrison 28ff66a381 Merge branch 'feature/feynman-rules' into feature/qed-fvol 2016-11-01 16:07:46 +00:00
James Harrison 78c7bcee36 QedFVol: Change variables of type "double" to type "Real". 2016-11-01 16:06:05 +00:00
Azusa Yamaguchi 164d3691db Staggered 2016-11-01 14:24:22 +00:00
portelli 00a7b95631 Merge remote-tracking branch 'gh-james/feature/qed-fvol' into feature/qed-fvol 2016-10-31 18:46:23 +00:00
portelli 94d8321d01 Merge branch 'feature/feynman-rules' into feature/qed-fvol 2016-10-31 18:41:30 +00:00
James Harrison ac24cc9f99 Merge branch 'feature/feynman-rules' into feature/qed-fvol 2016-10-29 11:05:26 +01:00
Guido Cossu 1d666771f9 Debugging the RNG, eliminate the barrier after broadcast 2016-10-26 16:08:23 +01:00
Guido Cossu d50055cd96 Making the ILDG support optional 2016-10-26 09:48:01 +01:00
James Harrison 3ab4c8c0bb QedFVol: calculate plaquette and 2x2 Wilson loop of stochastic QED field 2016-10-25 13:32:02 +01:00
Guido Cossu 47c7159177 ILDG reader/writer works
Fill the xml header with the required information, todo.
2016-10-24 21:57:54 +01:00
Guido Cossu f415db583a Adding ILDG format 2016-10-24 15:48:22 +01:00
Guido Cossu f55c16f984 Adding a barrier in the RNG save 2016-10-24 11:02:14 +01:00
Guido Cossu df67e013ca More debug output for the RNG 2016-10-22 13:34:17 +01:00
Guido Cossu 3e990c9d0a Reverting the broadcast change 2016-10-22 13:26:43 +01:00
Guido Cossu 4b740fc8fd Debugging the RNG state save 2016-10-22 13:06:00 +01:00
Guido Cossu cccd14b09e Small cleanup 2016-10-21 17:20:54 +01:00
Guido Cossu e6acffdfc2 Fixing the plaquette computation 2016-10-21 16:06:34 +01:00
portelli 26d124283e Merge branch 'feature/feynman-rules' into feature/qed-fvol 2016-10-21 15:23:31 +01:00
portelli 0d889b7041 QedFVol: first attempt at generating a QED field 2016-10-21 15:21:32 +01:00
portelli ab31ad006a Merge branch 'feature/feynman-rules' into feature/qed-fvol 2016-10-21 14:42:18 +01:00
Guido Cossu 392130a537 Working on the 5d 2016-10-21 14:22:25 +01:00
Guido Cossu deef2673b2 Separating the Lattice theories stub from the QCD.h file 2016-10-20 17:24:08 +01:00
Guido Cossu 977b0a6dd9 Merge branch 'develop' into feature/hmc_generalise 2016-10-20 17:04:41 +01:00
Guido Cossu 977d844394 Few modifications on stdout messages 2016-10-20 17:01:59 +01:00
portelli 6e4a06e180 qed-fvol: initial commit 2016-10-20 15:04:00 +01:00
Guido Cossu 590675e2ca Csum in hex format 2016-10-19 17:26:25 +01:00
Guido Cossu 8c65bdf6d3 Printing checksum for the RNG file 2016-10-19 16:56:11 +01:00
Guido Cossu 74f1ed3bc5 Adding some documentation for HMC 2016-10-19 10:51:13 +01:00
Guido Cossu 79270ef510 Added a test for EODWF Scaled Shamir with general HMC 2016-10-14 17:34:26 +01:00
Guido Cossu e250e6b7bb Moving parameters outside of the HMCrunner 2016-10-14 17:22:32 +01:00
Guido Cossu 261342c15f Adding gh-pages 2016-10-13 11:51:25 +01:00
Guido Cossu eda4dd622e Some more edit 2016-10-11 15:45:20 +01:00
Guido Cossu c68a2b9637 Minor fix 2016-10-10 11:54:58 +01:00
Guido Cossu 293df6cd20 Generalising the HMCRunner and moving parameters to the user level 2016-10-10 11:49:55 +01:00
Guido Cossu 65f61bb3bf Reset QCD colours to 3 2016-10-10 09:46:17 +01:00
Guido Cossu 26b9740d53 Some fix for the GenericHMCrunner 2016-10-10 09:43:05 +01:00
Guido Cossu 6eb873dd96 Added scalar action phi^4
Check Norm2 output (Complex type assumption)
2016-10-07 17:28:46 +01:00
Guido Cossu 11b4c80b27 Added support for hmc and binary IO for a general field 2016-10-07 13:37:29 +01:00
Guido Cossu c065e454c3 Adding Binrary IO, untested 2016-10-06 10:12:11 +01:00
Guido Cossu d9b5fbd374 In the middle of adding a general binary writer 2016-10-04 11:24:08 +01:00
Guido Cossu cfbc1a26b8 Now the gauge implementation has to take care of the Nexp 2016-10-03 16:20:06 +01:00
Guido Cossu 257f69f931 One more function to generalise the HMC integrator 2016-10-03 15:50:04 +01:00
Guido Cossu e415260961 First cut on generalised HMC
Backward compatibility OK
2016-10-03 15:28:00 +01:00
paboyle 446c768cd3 Merge branch 'hotfix/v0.5.1'
Double precision compile fix
2016-07-01 16:33:59 +01:00
469 changed files with 76301 additions and 12926 deletions
+7 -1
View File
@@ -92,6 +92,7 @@ build*/*
#####################
*.xcodeproj/*
build.sh
.vscode
# Eigen source #
################
@@ -106,6 +107,10 @@ lib/fftw/*
m4/lt*
m4/libtool.m4
# github pages #
################
gh-pages/
# Buck files #
##############
.buck*
@@ -116,4 +121,5 @@ make-bin-BUCK.sh
# generated sources #
#####################
lib/qcd/spin/gamma-gen/*.h
lib/qcd/spin/gamma-gen/*.cc
lib/qcd/spin/gamma-gen/*.cc
+8 -68
View File
@@ -7,64 +7,8 @@ cache:
matrix:
include:
- os: osx
osx_image: xcode7.2
osx_image: xcode8.3
compiler: clang
- compiler: gcc
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-4.9
- libmpfr-dev
- libgmp-dev
- libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev
env: VERSION=-4.9
- compiler: gcc
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-5
- libmpfr-dev
- libgmp-dev
- libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev
env: VERSION=-5
- compiler: clang
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-4.8
- libmpfr-dev
- libgmp-dev
- libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
- compiler: clang
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-4.8
- libmpfr-dev
- libgmp-dev
- libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
before_install:
- export GRIDDIR=`pwd`
@@ -73,13 +17,15 @@ before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
install:
- export CC=$CC$VERSION
- export CXX=$CXX$VERSION
- echo $PATH
- which autoconf
- autoconf --version
- which automake
- automake --version
- which $CC
- $CC --version
- which $CXX
@@ -92,15 +38,9 @@ script:
- cd build
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
- make -j4
- ./benchmarks/Benchmark_dwf --threads 1
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
- echo make clean
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
- make -j4
- ./benchmarks/Benchmark_dwf --threads 1
- echo make clean
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
- make -j4
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
- make check
+8 -3
View File
@@ -3,10 +3,15 @@ SUBDIRS = lib benchmarks tests extras
include $(top_srcdir)/doxygen.inc
tests: all
$(MAKE) -C tests tests
bin_SCRIPTS=grid-config
.PHONY: tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL)
.PHONY: bench check tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL)
tests-local: all
bench-local: all
check-local: all
AM_CXXFLAGS += -I$(top_builddir)/include
ACLOCAL_AMFLAGS = -I m4
+256 -46
View File
@@ -1,41 +1,13 @@
# Grid
<table>
<tr>
<td>Last stable release</td>
<td><a href="https://travis-ci.org/paboyle/Grid">
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a>
</td>
</tr>
<tr>
<td>Development branch</td>
<td><a href="https://travis-ci.org/paboyle/Grid">
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a>
</td>
</tr>
</table>
# Grid [![Teamcity status](http://ci.cliath.ph.ed.ac.uk/app/rest/builds/aggregated/strob:(buildType:(affectedProject(id:Grid)),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=Grid&tab=projectOverview) [![Travis status](https://travis-ci.org/paboyle/Grid.svg?branch=develop)](https://travis-ci.org/paboyle/Grid)
**Data parallel C++ mathematical object library.**
License: GPL v2.
Last update Nov 2016.
Last update June 2017.
_Please do not send pull requests to the `master` branch which is reserved for releases._
### Bug report
_To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._
When you file an issue, please go though the following checklist:
1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.
2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler.
3. Give the exact `configure` command used.
4. Attach `config.log`.
5. Attach `config.summary`.
6. Attach the output of `make V=1`.
7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example.
### Description
@@ -58,13 +30,68 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi
for most programmers.
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
Presently SSE4 (128 bit) AVX, AVX2, QPX (256 bit), IMCI, and AVX512 (512 bit) targets are supported (ARM NEON on the way).
Presently SSE4, ARM NEON (128 bits) AVX, AVX2, QPX (256 bits), IMCI and AVX512 (512 bits) targets are supported.
These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers.
These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types.
The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`.
MPI, OpenMP, and SIMD parallelism are present in the library.
Please see https://arxiv.org/abs/1512.03487 for more detail.
Please see [this paper](https://arxiv.org/abs/1512.03487) for more detail.
### Compilers
Intel ICPC v16.0.3 and later
Clang v3.5 and later (need 3.8 and later for OpenMP)
GCC v4.9.x (recommended)
GCC v6.3 and later
### Important:
Some versions of GCC appear to have a bug under high optimisation (-O2, -O3).
The safety of these compiler versions cannot be guaranteed at this time. Follow Issue 100 for details and updates.
GCC v5.x
GCC v6.1, v6.2
### Bug report
_To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._
When you file an issue, please go though the following checklist:
1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.
2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler.
3. Give the exact `configure` command used.
4. Attach `config.log`.
5. Attach `grid.config.summary`.
6. Attach the output of `make V=1`.
7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example.
### Required libraries
Grid requires:
[GMP](https://gmplib.org/),
[MPFR](http://www.mpfr.org/)
Bootstrapping grid downloads and uses for internal dense matrix (non-QCD operations) the Eigen library.
Grid optionally uses:
[HDF5](https://support.hdfgroup.org/HDF5/)
[LIME](http://usqcd-software.github.io/c-lime/) for ILDG and SciDAC file format support.
[FFTW](http://www.fftw.org) either generic version or via the Intel MKL library.
LAPACK either generic version or Intel MKL library.
### Quick start
First, start by cloning the repository:
@@ -95,10 +122,10 @@ install Grid. Other options are detailed in the next section, you can also use `
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
customise the build.
Finally, you can build and install Grid:
Finally, you can build, check, and install Grid:
``` bash
make; make install
make; make check; make install
```
To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute:
@@ -121,7 +148,7 @@ If you want to build all the tests at once just use `make tests`.
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes).
- `--enable-precision={single|double}`: set the default precision (default: `double`).
- `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
- `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `).
- `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `).
- `--disable-timers`: disable system dependent high-resolution timers.
- `--enable-chroma`: enable Chroma regression tests.
- `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`)
@@ -135,7 +162,6 @@ The following options can be use with the `--enable-comms=` option to target dif
| `none` | no communications |
| `mpi[-auto]` | MPI communications |
| `mpi3[-auto]` | MPI communications using MPI 3 shared memory |
| `mpi3l[-auto]` | MPI communications using MPI 3 shared memory and leader model |
| `shmem ` | Cray SHMEM communications |
For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). The `-auto` suffix is not supported by the Cray environment wrapper scripts. Use the standard versions instead.
@@ -153,13 +179,13 @@ The following options can be use with the `--enable-simd=` option to target diff
| `AVXFMA4` | AVX (256 bit) + FMA4 |
| `AVX2` | AVX 2 (256 bit) |
| `AVX512` | AVX 512 bit |
| `QPX` | QPX (256 bit) |
| `NEONv8` | [ARM NEON](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch07s03.html) (128 bit) |
| `QPX` | IBM QPX (256 bit) |
Alternatively, some CPU codenames can be directly used:
| `<code>` | Description |
| ----------- | -------------------------------------- |
| `KNC` | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
| `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |
| `BGQ` | Blue Gene/Q |
@@ -176,21 +202,205 @@ The following configuration is recommended for the Intel Knights Landing platfor
``` bash
../configure --enable-precision=double\
--enable-simd=KNL \
--enable-comms=mpi-auto \
--with-gmp=<path> \
--with-mpfr=<path> \
--enable-comms=mpi-auto \
--enable-mkl \
CXX=icpc MPICXX=mpiicpc
```
The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
where `<path>` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
``` bash
../configure --enable-precision=double\
--enable-simd=KNL \
--enable-comms=mpi \
--with-gmp=<path> \
--with-mpfr=<path> \
--enable-mkl \
CXX=CC CC=cc
```
```
If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed:
``` bash
--with-gmp=<path> \
--with-mpfr=<path> \
```
where `<path>` is the UNIX prefix where GMP and MPFR are installed.
Knight's Landing with Intel Omnipath adapters with two adapters per node
presently performs better with use of more than one rank per node, using shared memory
for interior communication. This is the mpi3 communications implementation.
We recommend four ranks per node for best performance, but optimum is local volume dependent.
``` bash
../configure --enable-precision=double\
--enable-simd=KNL \
--enable-comms=mpi3-auto \
--enable-mkl \
CC=icpc MPICXX=mpiicpc
```
### Build setup for Intel Haswell Xeon platform
The following configuration is recommended for the Intel Haswell platform:
``` bash
../configure --enable-precision=double\
--enable-simd=AVX2 \
--enable-comms=mpi3-auto \
--enable-mkl \
CXX=icpc MPICXX=mpiicpc
```
The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed:
``` bash
--with-gmp=<path> \
--with-mpfr=<path> \
```
where `<path>` is the UNIX prefix where GMP and MPFR are installed.
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
``` bash
../configure --enable-precision=double\
--enable-simd=AVX2 \
--enable-comms=mpi3 \
--enable-mkl \
CXX=CC CC=cc
```
Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of
one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using
```
export I_MPI_PIN=1
```
This is the default.
### Build setup for Intel Skylake Xeon platform
The following configuration is recommended for the Intel Skylake platform:
``` bash
../configure --enable-precision=double\
--enable-simd=AVX512 \
--enable-comms=mpi3 \
--enable-mkl \
CXX=mpiicpc
```
The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed:
``` bash
--with-gmp=<path> \
--with-mpfr=<path> \
```
where `<path>` is the UNIX prefix where GMP and MPFR are installed.
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
``` bash
../configure --enable-precision=double\
--enable-simd=AVX512 \
--enable-comms=mpi3 \
--enable-mkl \
CXX=CC CC=cc
```
Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of
one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using
```
export I_MPI_PIN=1
```
This is the default.
#### Expected Skylake Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping):
mpirun -n 2 benchmarks/Benchmark_dwf --grid 16.16.16.16 --mpi 2.1.1.1 --cacheblocking 2.2.2.2 --dslash-asm --shm 1024 --threads 18
TBA
### Build setup for AMD EPYC / RYZEN
The AMD EPYC is a multichip module comprising 32 cores spread over four distinct chips each with 8 cores.
So, even with a single socket node there is a quad-chip module. Dual socket nodes with 64 cores total
are common. Each chip within the module exposes a separate NUMA domain.
There are four NUMA domains per socket and we recommend one MPI rank per NUMA domain.
MPI-3 is recommended with the use of four ranks per socket,
and 8 threads per rank.
The following configuration is recommended for the AMD EPYC platform.
``` bash
../configure --enable-precision=double\
--enable-simd=AVX2 \
--enable-comms=mpi3 \
CXX=mpicxx
```
If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed:
``` bash
--with-gmp=<path> \
--with-mpfr=<path> \
```
where `<path>` is the UNIX prefix where GMP and MPFR are installed.
Using MPICH and g++ v4.9.2, best performance can be obtained using explicit GOMP_CPU_AFFINITY flags for each MPI rank.
This can be done by invoking MPI on a wrapper script omp_bind.sh to handle this.
It is recommended to run 8 MPI ranks on a single dual socket AMD EPYC, with 8 threads per rank using MPI3 and
shared memory to communicate within this node:
mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --mpi 2.2.2.1 --dslash-unroll --threads 8 --grid 16.16.16.16 --cacheblocking 4.4.4.4
Where omp_bind.sh does the following:
```
#!/bin/bash
numanode=` expr $PMI_RANK % 8 `
basecore=`expr $numanode \* 16`
core0=`expr $basecore + 0 `
core1=`expr $basecore + 2 `
core2=`expr $basecore + 4 `
core3=`expr $basecore + 6 `
core4=`expr $basecore + 8 `
core5=`expr $basecore + 10 `
core6=`expr $basecore + 12 `
core7=`expr $basecore + 14 `
export GOMP_CPU_AFFINITY="$core0 $core1 $core2 $core3 $core4 $core5 $core6 $core7"
echo GOMP_CUP_AFFINITY $GOMP_CPU_AFFINITY
$@
```
Performance:
#### Expected AMD EPYC 7601 dual socket (single prec, single node 32+32 cores) performance using NUMA MPI mapping):
mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --threads 8 --mpi 2.2.2.1 --dslash-unroll --grid 16.16.16.16 --cacheblocking 4.4.4.4
TBA
### Build setup for BlueGene/Q
To be written...
### Build setup for ARM Neon
To be written...
### Build setup for laptops, other compilers, non-cluster builds
Many versions of g++ and clang++ work with Grid, and involve merely replacing CXX (and MPICXX),
and omit the enable-mkl flag.
Single node builds are enabled with
```
--enable-comms=none
```
FFTW support that is not in the default search path may then enabled with
```
--with-fftw=<installpath>
```
BLAS will not be compiled in by default, and Lanczos will default to Eigen diagonalisation.
+56 -14
View File
@@ -1,6 +1,35 @@
TODO:
---------------
Large item work list:
1)- BG/Q port and check ; Andrew says ok.
2)- Christoph's local basis expansion Lanczos
--
3a)- RNG I/O in ILDG/SciDAC (minor)
3b)- Precision conversion and sort out localConvert <-- partial/easy
3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet
4)- Physical propagator interface
5)- Conserved currents
6)- Multigrid Wilson and DWF, compare to other Multigrid implementations
7)- HDCR resume
Recent DONE
-- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O ; <-- DONE ; bmark cori
-- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE
-- GaugeFix into central location <-- DONE
-- Scidac and Ildg metadata handling <-- DONE
-- Binary I/O MPI2 IO <-- DONE
-- Binary I/O speed up & x-strips <-- DONE
-- Cut down the exterior overhead <-- DONE
-- Interior legs from SHM comms <-- DONE
-- Half-precision comms <-- DONE
-- Merge high precision reduction into develop <-- DONE
-- BlockCG, BCGrQ <-- DONE
-- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE
-- slice* linalg routines for multiRHS, BlockCG
-----
* Forces; the UdSdU term in gauge force term is half of what I think it should
be. This is a consequence of taking ONLY the first term in:
@@ -21,16 +50,8 @@ TODO:
This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two.
This 2x is applied by hand in the fermion routines and in the Test_rect_force routine.
Policies:
* Link smearing/boundary conds; Policy class based implementation ; framework more in place
* Support different boundary conditions (finite temp, chem. potential ... )
* Support different fermion representations?
- contained entirely within the integrator presently
- Sign of force term.
- Reversibility test.
@@ -41,11 +62,6 @@ Policies:
- Audit oIndex usage for cb behaviour
- Rectangle gauge actions.
Iwasaki,
Symanzik,
... etc...
- Prepare multigrid for HMC. - Alternate setup schemes.
- Support for ILDG --- ugly, not done
@@ -55,9 +71,11 @@ Policies:
- FFTnD ?
- Gparity; hand opt use template specialisation elegance to enable the optimised paths ?
- Gparity force term; Gparity (R)HMC.
- Random number state save restore
- Mobius implementation clean up to rmove #if 0 stale code sequences
- CG -- profile carefully, kernel fusion, whole CG performance measurements.
================================================================
@@ -90,6 +108,7 @@ Insert/Extract
Not sure of status of this -- reverify. Things are working nicely now though.
* Make the Tensor types and Complex etc... play more nicely.
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
want to introduce a syntax that does not require this.
@@ -112,6 +131,8 @@ Not sure of status of this -- reverify. Things are working nicely now though.
RECENT
---------------
- Support different fermion representations? -- DONE
- contained entirely within the integrator presently
- Clean up HMC -- DONE
- LorentzScalar<GaugeField> gets Gauge link type (cleaner). -- DONE
- Simplified the integrators a bit. -- DONE
@@ -123,6 +144,26 @@ RECENT
- Parallel io improvements -- DONE
- Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE
DONE:
- MultiArray -- MultiRHS done
- ConjugateGradientMultiShift -- DONE
- MCR -- DONE
- Remez -- Mike or Boost? -- DONE
- Proto (ET) -- DONE
- uBlas -- DONE ; Eigen
- Potentially Useful Boost libraries -- DONE ; Eigen
- Aligned allocator; memory pool -- DONE
- Multiprecision -- DONE
- Serialization -- DONE
- Regex -- Not needed
- Tokenize -- Why?
- Random number state save restore -- DONE
- Rectangle gauge actions. -- DONE
Iwasaki,
Symanzik,
... etc...
Done: Cayley, Partial , ContFrac force terms.
DONE
@@ -207,6 +248,7 @@ Done
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
======================================================================================================
* Link smearing/boundary conds; Policy class based implementation ; framework more in place -- DONE
* Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE
user pass these? Is this a QCD specific?
+4 -5
View File
@@ -1,6 +1,5 @@
Version : 0.6.0
Version : 0.7.0
- AVX512, AVX2, AVX, SSE good
- Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above
- MPI and MPI3
- HiRep, Smearing, Generic gauge group
- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended
- MPI and MPI3 comms optimisations for KNL and OPA finished
- Half precision comms
+800
View File
@@ -0,0 +1,800 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_memory_bandwidth.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF;
typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD;
std::vector<int> L_list;
std::vector<int> Ls_list;
std::vector<double> mflop_list;
double mflop_ref;
double mflop_ref_err;
int NN_global;
struct time_statistics{
double mean;
double err;
double min;
double max;
void statistics(std::vector<double> v){
double sum = std::accumulate(v.begin(), v.end(), 0.0);
mean = sum / v.size();
std::vector<double> diff(v.size());
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
err = std::sqrt(sq_sum / (v.size()*(v.size() - 1)));
auto result = std::minmax_element(v.begin(), v.end());
min = *result.first;
max = *result.second;
}
};
void comms_header(){
std::cout <<GridLogMessage << " L "<<"\t"<<" Ls "<<"\t"
<<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl;
};
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT
};
struct controls {
int Opt;
int CommsOverlap;
Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch;
// int HugePages;
};
class Benchmark {
public:
static void Decomposition (void ) {
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n";
std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
std::cout<<GridLogMessage<<"\tMPI tasks : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl;
std::cout<<GridLogMessage<<"\tvReal : "<<sizeof(vReal )*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vReal::Nsimd()))<<std::endl;
std::cout<<GridLogMessage<<"\tvRealF : "<<sizeof(vRealF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl;
std::cout<<GridLogMessage<<"\tvRealD : "<<sizeof(vRealD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl;
std::cout<<GridLogMessage<<"\tvComplex : "<<sizeof(vComplex )*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplex::Nsimd()))<<std::endl;
std::cout<<GridLogMessage<<"\tvComplexF : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl;
std::cout<<GridLogMessage<<"\tvComplexD : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
}
static void Comms(void)
{
int Nloop=200;
int nmu=0;
int maxlat=32;
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++;
std::vector<double> t_time(Nloop);
time_statistics timestat;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
comms_header();
for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=8;Ls*=2){
std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
lat*mpi_layout[2],
lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<HalfSpinColourVectorD *> xbuf(8);
std::vector<HalfSpinColourVectorD *> rbuf(8);
Grid.ShmBufferFreeAll();
for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
}
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
int ncomm;
double dbytes;
std::vector<double> times(Nloop);
for(int i=0;i<Nloop;i++){
double start=usecond();
dbytes=0;
ncomm=0;
parallel_for(int dir=0;dir<8;dir++){
double tbytes;
int mu =dir % 4;
if (mpi_layout[mu]>1 ) {
int xmit_to_rank;
int recv_from_rank;
if ( dir == mu ) {
int comm_proc=1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
} else {
int comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
}
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
(void *)&rbuf[dir][0], recv_from_rank,
bytes,dir);
#ifdef GRID_OMP
#pragma omp atomic
#endif
ncomm++;
#ifdef GRID_OMP
#pragma omp atomic
#endif
dbytes+=tbytes;
}
}
Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
}
timestat.statistics(t_time);
// for(int i=0;i<t_time.size();i++){
// std::cout << i<<" "<<t_time[i]<<std::endl;
// }
dbytes=dbytes*ppn;
double xbytes = dbytes*0.5;
double rbytes = dbytes*0.5;
double bidibytes = dbytes;
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
}
}
return;
}
static void Memory(void)
{
const int Nvec=8;
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
typedef iVector<vReal,Nvec> Vec;
std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<< "\t\tGB/s / node"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
uint64_t NP;
uint64_t NN;
uint64_t lmax=48;
#define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
for(int lat=8;lat<=lmax;lat+=4){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
NP= Grid.RankCount();
NN =Grid.NodeCount();
Vec rn ; random(sRNG,rn);
LatticeVec z(&Grid); z=rn;
LatticeVec x(&Grid); x=rn;
LatticeVec y(&Grid); y=rn;
double a=2.0;
uint64_t Nloop=NLOOP;
double start=usecond();
for(int i=0;i<Nloop;i++){
z=a*x-y;
x._odata[0]=z._odata[0]; // force serial dependency to prevent optimise away
y._odata[4]=z._odata[4];
}
double stop=usecond();
double time = (stop-start)/Nloop*1000;
double flops=vol*Nvec*2;// mul,add
double bytes=3.0*vol*Nvec*sizeof(Real);
std::cout<<GridLogMessage<<std::setprecision(3)
<< lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.
<< "\t\t"<< bytes/time/NN <<std::endl;
}
};
static double DWF5(int Ls,int L)
{
RealD mass=0.1;
RealD M5 =1.8;
double mflops;
double mflops_best = 0;
double mflops_worst= 0;
std::vector<double> mflops_all;
///////////////////////////////////////////////////////
// Set/Get the layout & grid size
///////////////////////////////////////////////////////
int threads = GridThread::GetThreads();
std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4);
std::vector<int> local({L,L,L,L});
GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),
GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
uint64_t NP = TmpGrid->RankCount();
uint64_t NN = TmpGrid->NodeCount();
NN_global=NN;
uint64_t SHM=NP/NN;
std::vector<int> internal;
if ( SHM == 1 ) internal = std::vector<int>({1,1,1,1});
else if ( SHM == 2 ) internal = std::vector<int>({2,1,1,1});
else if ( SHM == 4 ) internal = std::vector<int>({2,2,1,1});
else if ( SHM == 8 ) internal = std::vector<int>({2,2,2,1});
else assert(0);
std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]});
std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]});
///////// Welcome message ////////////
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << "Benchmark DWF Ls vec on "<<L<<"^4 local volume "<<std::endl;
std::cout<<GridLogMessage << "* Global volume : "<<GridCmdVectorIntToString(latt4)<<std::endl;
std::cout<<GridLogMessage << "* Ls : "<<Ls<<std::endl;
std::cout<<GridLogMessage << "* MPI ranks : "<<GridCmdVectorIntToString(mpi)<<std::endl;
std::cout<<GridLogMessage << "* Intranode : "<<GridCmdVectorIntToString(internal)<<std::endl;
std::cout<<GridLogMessage << "* nodes : "<<GridCmdVectorIntToString(nodes)<<std::endl;
std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
///////// Lattice Init ////////////
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi());
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
///////// RNG Init ////////////
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
GridParallelRNG RNG5(sFGrid); RNG5.SeedFixedIntegers(seeds5);
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
///////// Source preparation ////////////
LatticeFermion src (sFGrid); random(RNG5,src);
LatticeFermion tmp (sFGrid);
RealD N2 = 1.0/::sqrt(norm2(src));
src = src*N2;
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
LatticeFermion src_e (sFrbGrid);
LatticeFermion src_o (sFrbGrid);
LatticeFermion r_e (sFrbGrid);
LatticeFermion r_o (sFrbGrid);
LatticeFermion r_eo (sFGrid);
LatticeFermion err (sFGrid);
{
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd,src_o,src);
#if defined(AVX512)
const int num_cases = 6;
std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O ");
#else
const int num_cases = 4;
std::string fmt("U/S ; U/O ; G/S ; G/O ");
#endif
controls Cases [] = {
#ifdef AVX512
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
#endif
{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
{ QCD::WilsonKernelsStatic::OptGeneric , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
{ QCD::WilsonKernelsStatic::OptGeneric , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }
};
for(int c=0;c<num_cases;c++) {
QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
QCD::WilsonKernelsStatic::Opt = Cases[c].Opt;
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
int nwarm = 100;
uint64_t ncall = 1000;
double t0=usecond();
sFGrid->Barrier();
for(int i=0;i<nwarm;i++){
sDw.DhopEO(src_o,r_e,DaggerNo);
}
sFGrid->Barrier();
double t1=usecond();
sDw.ZeroCounters();
time_statistics timestat;
std::vector<double> t_time(ncall);
for(uint64_t i=0;i<ncall;i++){
t0=usecond();
sDw.DhopEO(src_o,r_e,DaggerNo);
t1=usecond();
t_time[i] = t1-t0;
}
sFGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume)/2;
double mf_hi, mf_lo, mf_err;
timestat.statistics(t_time);
mf_hi = flops/timestat.min;
mf_lo = flops/timestat.max;
mf_err= flops/timestat.min * timestat.err/timestat.mean;
mflops = flops/timestat.mean;
mflops_all.push_back(mflops);
if ( mflops_best == 0 ) mflops_best = mflops;
if ( mflops_worst== 0 ) mflops_worst= mflops;
if ( mflops>mflops_best ) mflops_best = mflops;
if ( mflops<mflops_worst) mflops_worst= mflops;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per rank "<< mflops/NP<<std::endl;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per node "<< mflops/NN<<std::endl;
sDw.Report();
}
double robust = mflops_worst/mflops_best;;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Best mflop/s = "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl;
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Worst mflop/s = "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl;
std::cout<<GridLogMessage <<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness = "<< robust <<std::endl;
std::cout<<GridLogMessage <<fmt << std::endl;
std::cout<<GridLogMessage;
for(int i=0;i<mflops_all.size();i++){
std::cout<<mflops_all[i]/NN<<" ; " ;
}
std::cout<<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
}
return mflops_best;
}
static double DWF(int Ls,int L, double & robust)
{
RealD mass=0.1;
RealD M5 =1.8;
double mflops;
double mflops_best = 0;
double mflops_worst= 0;
std::vector<double> mflops_all;
///////////////////////////////////////////////////////
// Set/Get the layout & grid size
///////////////////////////////////////////////////////
int threads = GridThread::GetThreads();
std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4);
std::vector<int> local({L,L,L,L});
GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),
GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
uint64_t NP = TmpGrid->RankCount();
uint64_t NN = TmpGrid->NodeCount();
NN_global=NN;
uint64_t SHM=NP/NN;
std::vector<int> internal;
if ( SHM == 1 ) internal = std::vector<int>({1,1,1,1});
else if ( SHM == 2 ) internal = std::vector<int>({2,1,1,1});
else if ( SHM == 4 ) internal = std::vector<int>({2,2,1,1});
else if ( SHM == 8 ) internal = std::vector<int>({2,2,2,1});
else assert(0);
std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]});
std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]});
///////// Welcome message ////////////
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl;
std::cout<<GridLogMessage << "* Global volume : "<<GridCmdVectorIntToString(latt4)<<std::endl;
std::cout<<GridLogMessage << "* Ls : "<<Ls<<std::endl;
std::cout<<GridLogMessage << "* MPI ranks : "<<GridCmdVectorIntToString(mpi)<<std::endl;
std::cout<<GridLogMessage << "* Intranode : "<<GridCmdVectorIntToString(internal)<<std::endl;
std::cout<<GridLogMessage << "* nodes : "<<GridCmdVectorIntToString(nodes)<<std::endl;
std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
///////// Lattice Init ////////////
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
///////// RNG Init ////////////
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
///////// Source preparation ////////////
LatticeFermion src (FGrid); random(RNG5,src);
LatticeFermion ref (FGrid);
LatticeFermion tmp (FGrid);
RealD N2 = 1.0/::sqrt(norm2(src));
src = src*N2;
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
////////////////////////////////////
// Naive wilson implementation
////////////////////////////////////
{
LatticeGaugeField Umu5d(FGrid);
std::vector<LatticeColourMatrix> U(4,FGrid);
for(int ss=0;ss<Umu._grid->oSites();ss++){
for(int s=0;s<Ls;s++){
Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
}
}
ref = zero;
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
}
for(int mu=0;mu<Nd;mu++){
tmp = U[mu]*Cshift(src,mu+1,1);
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
tmp =adj(U[mu])*src;
tmp =Cshift(tmp,mu+1,-1);
ref=ref + tmp + Gamma(Gmu[mu])*tmp;
}
ref = -0.5*ref;
}
LatticeFermion src_e (FrbGrid);
LatticeFermion src_o (FrbGrid);
LatticeFermion r_e (FrbGrid);
LatticeFermion r_o (FrbGrid);
LatticeFermion r_eo (FGrid);
LatticeFermion err (FGrid);
{
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd,src_o,src);
#if defined(AVX512)
const int num_cases = 6;
std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O ");
#else
const int num_cases = 4;
std::string fmt("U/S ; U/O ; G/S ; G/O ");
#endif
controls Cases [] = {
#ifdef AVX512
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
#endif
{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
{ QCD::WilsonKernelsStatic::OptGeneric , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
{ QCD::WilsonKernelsStatic::OptGeneric , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }
};
for(int c=0;c<num_cases;c++) {
QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
QCD::WilsonKernelsStatic::Opt = Cases[c].Opt;
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
int nwarm = 200;
double t0=usecond();
FGrid->Barrier();
for(int i=0;i<nwarm;i++){
Dw.DhopEO(src_o,r_e,DaggerNo);
}
FGrid->Barrier();
double t1=usecond();
// uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0);
// if (ncall < 500) ncall = 500;
uint64_t ncall = 1000;
FGrid->Broadcast(0,&ncall,sizeof(ncall));
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
Dw.ZeroCounters();
time_statistics timestat;
std::vector<double> t_time(ncall);
for(uint64_t i=0;i<ncall;i++){
t0=usecond();
Dw.DhopEO(src_o,r_e,DaggerNo);
t1=usecond();
t_time[i] = t1-t0;
}
FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume)/2;
double mf_hi, mf_lo, mf_err;
timestat.statistics(t_time);
mf_hi = flops/timestat.min;
mf_lo = flops/timestat.max;
mf_err= flops/timestat.min * timestat.err/timestat.mean;
mflops = flops/timestat.mean;
mflops_all.push_back(mflops);
if ( mflops_best == 0 ) mflops_best = mflops;
if ( mflops_worst== 0 ) mflops_worst= mflops;
if ( mflops>mflops_best ) mflops_best = mflops;
if ( mflops<mflops_worst) mflops_worst= mflops;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl;
Dw.Report();
Dw.DhopEO(src_o,r_e,DaggerNo);
Dw.DhopOE(src_e,r_o,DaggerNo);
setCheckerboard(r_eo,r_o);
setCheckerboard(r_eo,r_e);
err = r_eo-ref;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
assert((norm2(err)<1.0e-4));
}
robust = mflops_worst/mflops_best;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Best mflop/s = "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl;
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Worst mflop/s = "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl;
std::cout<<GridLogMessage << std::fixed<<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness = "<< robust <<std::endl;
std::cout<<GridLogMessage <<fmt << std::endl;
std::cout<<GridLogMessage ;
for(int i=0;i<mflops_all.size();i++){
std::cout<<mflops_all[i]/NN<<" ; " ;
}
std::cout<<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
}
return mflops_best;
}
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential);
#ifdef KNL
LebesgueOrder::Block = std::vector<int>({8,2,2,2});
#else
LebesgueOrder::Block = std::vector<int>({2,2,2,2});
#endif
Benchmark::Decomposition();
int do_memory=1;
int do_comms =1;
int do_su3 =0;
int do_wilson=1;
int do_dwf =1;
if ( do_su3 ) {
// empty for now
}
#if 1
int sel=2;
std::vector<int> L_list({8,12,16,24});
#else
int sel=1;
std::vector<int> L_list({8,12});
#endif
int selm1=sel-1;
std::vector<double> robust_list;
std::vector<double> wilson;
std::vector<double> dwf4;
std::vector<double> dwf5;
if ( do_wilson ) {
int Ls=1;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " Wilson dslash 4D vectorised" <<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
for(int l=0;l<L_list.size();l++){
double robust;
wilson.push_back(Benchmark::DWF(1,L_list[l],robust));
}
}
int Ls=16;
if ( do_dwf ) {
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
for(int l=0;l<L_list.size();l++){
double robust;
double result = Benchmark::DWF(Ls,L_list[l],robust) ;
dwf4.push_back(result);
robust_list.push_back(robust);
}
}
if ( do_dwf ) {
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
for(int l=0;l<L_list.size();l++){
dwf5.push_back(Benchmark::DWF5(Ls,L_list[l]));
}
}
if ( do_dwf ) {
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << "L \t\t Wilson \t DWF4 \t DWF5 " <<std::endl;
for(int l=0;l<L_list.size();l++){
std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t "<<dwf4[l]<<" \t "<<dwf5[l] <<std::endl;
}
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
}
int NN=NN_global;
if ( do_memory ) {
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " Memory benchmark " <<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
Benchmark::Memory();
}
if ( do_comms && (NN>1) ) {
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " Communications benchmark " <<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
Benchmark::Comms();
}
if ( do_dwf ) {
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L \t\t Wilson\t\t DWF4 \t\t DWF5 " <<std::endl;
for(int l=0;l<L_list.size();l++){
std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]/NN<<" \t "<<dwf4[l]/NN<<" \t "<<dwf5[l] /NN<<std::endl;
}
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " Comparison point result: " << 0.5*(dwf4[sel]+dwf4[selm1])/NN << " Mflop/s per node"<<std::endl;
std::cout<<GridLogMessage << " Comparison point is 0.5*("<<dwf4[sel]/NN<<"+"<<dwf4[selm1]/NN << ") "<<std::endl;
std::cout<<std::setprecision(3);
std::cout<<GridLogMessage << " Comparison point robustness: " << robust_list[sel] <<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
}
Grid_finalize();
}
+266 -86
View File
@@ -31,6 +31,32 @@ using namespace std;
using namespace Grid;
using namespace Grid::QCD;
struct time_statistics{
double mean;
double err;
double min;
double max;
void statistics(std::vector<double> v){
double sum = std::accumulate(v.begin(), v.end(), 0.0);
mean = sum / v.size();
std::vector<double> diff(v.size());
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
err = std::sqrt(sq_sum / (v.size()*(v.size() - 1)));
auto result = std::minmax_element(v.begin(), v.end());
min = *result.first;
max = *result.second;
}
};
void header(){
std::cout <<GridLogMessage << " L "<<"\t"<<" Ls "<<"\t"
<<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl;
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
@@ -40,17 +66,21 @@ int main (int argc, char ** argv)
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
int Nloop=10;
int Nloop=100;
int nmu=0;
int maxlat=32;
for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++;
std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl;
std::vector<double> t_time(Nloop);
time_statistics timestat;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
int maxlat=16;
for(int lat=4;lat<=maxlat;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
header();
for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=8;Ls*=2){
std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
@@ -58,15 +88,23 @@ int main (int argc, char ** argv)
lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<Vector<HalfSpinColourVectorD> > xbuf(8);
std::vector<Vector<HalfSpinColourVectorD> > rbuf(8);
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
for(int mu=0;mu<8;mu++){
xbuf[mu].resize(lat*lat*lat*Ls);
rbuf[mu].resize(lat*lat*lat*Ls);
// std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl;
}
double start=usecond();
for(int i=0;i<Nloop;i++){
double start=usecond();
std::vector<CartesianCommunicator::CommsRequest_t> requests;
@@ -79,7 +117,6 @@ int main (int argc, char ** argv)
int comm_proc=1;
int xmit_to_rank;
int recv_from_rank;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.SendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
@@ -102,18 +139,24 @@ int main (int argc, char ** argv)
}
Grid.SendToRecvFromComplete(requests);
Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
}
double stop=usecond();
double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm;
timestat.statistics(t_time);
double dbytes = bytes*ppn;
double xbytes = dbytes*2.0*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
double time = stop-start; // microseconds
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
}
}
@@ -121,25 +164,32 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
header();
for(int lat=4;lat<=maxlat;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=8;Ls*=2){
std::vector<int> latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<Vector<HalfSpinColourVectorD> > xbuf(8);
std::vector<Vector<HalfSpinColourVectorD> > rbuf(8);
for(int mu=0;mu<8;mu++){
xbuf[mu].resize(lat*lat*lat*Ls);
rbuf[mu].resize(lat*lat*lat*Ls);
// std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl;
}
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond();
for(int i=0;i<Nloop;i++){
double start=usecond();
ncomm=0;
for(int mu=0;mu<4;mu++){
@@ -178,30 +228,37 @@ int main (int argc, char ** argv)
}
}
Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
}
double stop=usecond();
timestat.statistics(t_time);
double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm;
double dbytes = bytes*ppn;
double xbytes = dbytes*2.0*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
double time = stop-start;
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
}
}
Nloop=100;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
header();
for(int lat=4;lat<=maxlat;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=8;Ls*=2){
std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
@@ -209,6 +266,9 @@ int main (int argc, char ** argv)
lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<HalfSpinColourVectorD *> xbuf(8);
std::vector<HalfSpinColourVectorD *> rbuf(8);
@@ -216,73 +276,86 @@ int main (int argc, char ** argv)
for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
}
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond();
double dbytes;
for(int i=0;i<Nloop;i++){
double start=usecond();
dbytes=0;
ncomm=0;
std::vector<CartesianCommunicator::CommsRequest_t> requests;
ncomm=0;
for(int mu=0;mu<4;mu++){
if (mpi_layout[mu]>1 ) {
ncomm++;
int comm_proc=1;
int xmit_to_rank;
int recv_from_rank;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,
bytes);
dbytes+=
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,
bytes,mu);
comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,
bytes);
dbytes+=
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,
bytes,mu+4);
}
}
Grid.StencilSendToRecvFromComplete(requests);
Grid.StencilSendToRecvFromComplete(requests,0);
Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
}
double stop=usecond();
double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
timestat.statistics(t_time);
dbytes=dbytes*ppn;
double xbytes = dbytes*0.5;
double rbytes = dbytes*0.5;
double bidibytes = dbytes;
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
double time = stop-start; // microseconds
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
}
}
Nloop=100;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
header();
for(int lat=4;lat<=maxlat;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=8;Ls*=2){
std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
@@ -290,6 +363,9 @@ int main (int argc, char ** argv)
lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<HalfSpinColourVectorD *> xbuf(8);
std::vector<HalfSpinColourVectorD *> rbuf(8);
@@ -297,16 +373,18 @@ int main (int argc, char ** argv)
for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
}
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond();
double dbytes;
for(int i=0;i<Nloop;i++){
double start=usecond();
std::vector<CartesianCommunicator::CommsRequest_t> requests;
dbytes=0;
ncomm=0;
for(int mu=0;mu<4;mu++){
@@ -318,44 +396,146 @@ int main (int argc, char ** argv)
int recv_from_rank;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,
bytes);
// Grid.StencilSendToRecvFromComplete(requests);
// requests.resize(0);
dbytes+=
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,
bytes,mu);
Grid.StencilSendToRecvFromComplete(requests,mu);
requests.resize(0);
comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,
bytes);
Grid.StencilSendToRecvFromComplete(requests);
dbytes+=
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,
bytes,mu+4);
Grid.StencilSendToRecvFromComplete(requests,mu+4);
requests.resize(0);
}
}
Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
}
double stop=usecond();
double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
timestat.statistics(t_time);
double time = stop-start; // microseconds
dbytes=dbytes*ppn;
double xbytes = dbytes*0.5;
double rbytes = dbytes*0.5;
double bidibytes = dbytes;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
}
}
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
header();
for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=8;Ls*=2){
std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
lat*mpi_layout[2],
lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<HalfSpinColourVectorD *> xbuf(8);
std::vector<HalfSpinColourVectorD *> rbuf(8);
Grid.ShmBufferFreeAll();
for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
}
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double dbytes;
for(int i=0;i<Nloop;i++){
double start=usecond();
std::vector<CartesianCommunicator::CommsRequest_t> requests;
dbytes=0;
ncomm=0;
parallel_for(int dir=0;dir<8;dir++){
double tbytes;
int mu =dir % 4;
if (mpi_layout[mu]>1 ) {
ncomm++;
int xmit_to_rank;
int recv_from_rank;
if ( dir == mu ) {
int comm_proc=1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
} else {
int comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
}
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
(void *)&rbuf[dir][0], recv_from_rank, bytes,dir);
#pragma omp atomic
dbytes+=tbytes;
}
}
Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
}
timestat.statistics(t_time);
dbytes=dbytes*ppn;
double xbytes = dbytes*0.5;
double rbytes = dbytes*0.5;
double bidibytes = dbytes;
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
}
}
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= All done; Bye Bye"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
Grid_finalize();
}
+207 -99
View File
@@ -1,28 +1,22 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_dwf.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
@@ -48,16 +42,22 @@ typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF;
typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::vector<int> latt4 = GridDefaultLatt();
const int Ls=8;
int Ls=16;
for(int i=0;i<argc;i++)
if(std::string(argv[i]) == "-Ls"){
std::stringstream ss(argv[i+1]); ss >> Ls;
}
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
@@ -71,35 +71,66 @@ int main (int argc, char ** argv)
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
LatticeFermion src (FGrid); random(RNG5,src);
#if 0
src = zero;
{
std::vector<int> origin({0,0,0,latt4[2]-1,0});
SpinColourVectorF tmp;
tmp=zero;
tmp()(0)(0)=Complex(-2.0,0.0);
std::cout << " source site 0 " << tmp<<std::endl;
pokeSite(tmp,src,origin);
}
#else
RealD N2 = 1.0/::sqrt(norm2(src));
src = src*N2;
#endif
LatticeFermion result(FGrid); result=zero;
LatticeFermion ref(FGrid); ref=zero;
LatticeFermion tmp(FGrid);
LatticeFermion err(FGrid);
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
LatticeGaugeField Umu(UGrid);
random(RNG4,Umu);
LatticeGaugeField Umu5d(FGrid);
SU3::HotConfiguration(RNG4,Umu);
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
#if 0
Umu=1.0;
for(int mu=0;mu<Nd;mu++){
LatticeColourMatrix ttmp(UGrid);
ttmp = PeekIndex<LorentzIndex>(Umu,mu);
// if (mu !=2 ) ttmp = 0;
// ttmp = ttmp* pow(10.0,mu);
PokeIndex<LorentzIndex>(Umu,ttmp,mu);
}
std::cout << GridLogMessage << "Forced to diagonal " << std::endl;
#endif
////////////////////////////////////
// Naive wilson implementation
////////////////////////////////////
// replicate across fifth dimension
LatticeGaugeField Umu5d(FGrid);
std::vector<LatticeColourMatrix> U(4,FGrid);
for(int ss=0;ss<Umu._grid->oSites();ss++){
for(int s=0;s<Ls;s++){
Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
}
}
////////////////////////////////////
// Naive wilson implementation
////////////////////////////////////
std::vector<LatticeColourMatrix> U(4,FGrid);
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
}
std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl;
if (1)
{
@@ -120,8 +151,7 @@ int main (int argc, char ** argv)
RealD M5 =1.8;
RealD NP = UGrid->_Nprocessors;
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
RealD NN = UGrid->NodeCount();
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
@@ -131,15 +161,22 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
int ncall =100;
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
int ncall =500;
if (1) {
FGrid->Barrier();
Dw.ZeroCounters();
Dw.Dhop(src,result,0);
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
@@ -153,16 +190,55 @@ int main (int argc, char ** argv)
double flops=1344*volume*ncall;
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
/*
if(( norm2(err)>1.0e-4) ) {
std::cout << "RESULT\n " << result<<std::endl;
std::cout << "REF \n " << ref <<std::endl;
std::cout << "ERR \n " << err <<std::endl;
FGrid->Barrier();
exit(-1);
}
*/
assert (norm2(err)< 1.0e-4 );
Dw.Report();
}
DomainWallFermionRL DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
if (1) {
FGrid->Barrier();
DwH.ZeroCounters();
DwH.Dhop(src,result,0);
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
DwH.Dhop(src,result,0);
__SSC_STOP;
}
double t1=usecond();
FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
assert (norm2(err)< 1.0e-3 );
DwH.Report();
}
if (1)
{
@@ -171,6 +247,10 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
@@ -182,21 +262,13 @@ int main (int argc, char ** argv)
LatticeFermion sresult(sFGrid);
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
for(int x=0;x<latt4[0];x++){
for(int y=0;y<latt4[1];y++){
for(int z=0;z<latt4[2];z++){
for(int t=0;t<latt4[3];t++){
for(int s=0;s<Ls;s++){
std::vector<int> site({s,x,y,z,t});
SpinColourVector tmp;
peekSite(tmp,src,site);
pokeSite(tmp,ssrc,site);
}}}}}
localConvert(src,ssrc);
std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
FGrid->Barrier();
double t0=usecond();
sDw.Dhop(ssrc,sresult,0);
sDw.ZeroCounters();
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
sDw.Dhop(ssrc,sresult,0);
@@ -210,46 +282,53 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
// std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
sDw.Report();
if(0){
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
sDw.Dhop(ssrc,sresult,0);
PerformanceCounter Counter(i);
Counter.Start();
sDw.Dhop(ssrc,sresult,0);
Counter.Stop();
Counter.Report();
}
}
std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
RealD sum=0;
for(int x=0;x<latt4[0];x++){
for(int y=0;y<latt4[1];y++){
for(int z=0;z<latt4[2];z++){
for(int t=0;t<latt4[3];t++){
for(int s=0;s<Ls;s++){
std::vector<int> site({s,x,y,z,t});
SpinColourVector normal, simd;
peekSite(normal,result,site);
peekSite(simd,sresult,site);
sum=sum+norm2(normal-simd);
if (norm2(normal-simd) > 1.0e-6 ) {
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl;
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
}
}}}}}
std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
assert (sum< 1.0e-4 );
err=zero;
localConvert(sresult,err);
err = err - ref;
sum = norm2(err);
std::cout<<GridLogMessage<<" difference between normal ref and simd is "<<sum<<std::endl;
if(sum > 1.0e-4 ){
std::cout<< "sD REF\n " <<ref << std::endl;
std::cout<< "sD ERR \n " <<err <<std::endl;
}
// assert(sum < 1.0e-4);
if (1) {
err=zero;
localConvert(sresult,err);
err = err - result;
sum = norm2(err);
std::cout<<GridLogMessage<<" difference between normal result and simd is "<<sum<<std::endl;
if(sum > 1.0e-4 ){
std::cout<< "sD REF\n " <<result << std::endl;
std::cout<< "sD ERR \n " << err <<std::endl;
}
assert(sum < 1.0e-4);
if(1){
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::DhopEO "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric )
std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)
std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm )
std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
LatticeFermion sr_eo(sFGrid);
LatticeFermion ssrc_e (sFrbGrid);
LatticeFermion ssrc_o (sFrbGrid);
LatticeFermion sr_e (sFrbGrid);
@@ -257,39 +336,30 @@ int main (int argc, char ** argv)
pickCheckerboard(Even,ssrc_e,ssrc);
pickCheckerboard(Odd,ssrc_o,ssrc);
setCheckerboard(sr_eo,ssrc_o);
setCheckerboard(sr_eo,ssrc_e);
// setCheckerboard(sr_eo,ssrc_o);
// setCheckerboard(sr_eo,ssrc_e);
sr_e = zero;
sr_o = zero;
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::DhopEO "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
FGrid->Barrier();
sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
sDw.ZeroCounters();
sDw.stat.init("DhopEO");
// sDw.stat.init("DhopEO");
double t0=usecond();
for (int i = 0; i < ncall; i++) {
sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
}
double t1=usecond();
FGrid->Barrier();
sDw.stat.print();
// sDw.stat.print();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2;
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "sDeo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
sDw.Report();
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
@@ -298,24 +368,43 @@ int main (int argc, char ** argv)
pickCheckerboard(Even,ssrc_e,sresult);
pickCheckerboard(Odd ,ssrc_o,sresult);
ssrc_e = ssrc_e - sr_e;
RealD error = norm2(ssrc_e);
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<< " vec nrm"<<norm2(sr_e) <<std::endl;
ssrc_o = ssrc_o - sr_o;
ssrc_o = ssrc_o - sr_o;
error+= norm2(ssrc_o);
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl;
if(error>1.0e-4) {
if(( error>1.0e-4) ) {
setCheckerboard(ssrc,ssrc_o);
setCheckerboard(ssrc,ssrc_e);
std::cout<< ssrc << std::endl;
std::cout<< "DIFF\n " <<ssrc << std::endl;
setCheckerboard(ssrc,sr_o);
setCheckerboard(ssrc,sr_e);
std::cout<< "CBRESULT\n " <<ssrc << std::endl;
std::cout<< "RESULT\n " <<sresult<< std::endl;
}
assert(error<1.0e-4);
}
if(0){
std::cout << "Single cache warm call to sDw.Dhop " <<std::endl;
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
sDw.Dhop(ssrc,sresult,0);
PerformanceCounter Counter(i);
Counter.Start();
sDw.Dhop(ssrc,sresult,0);
Counter.Stop();
Counter.Report();
}
}
}
if (1)
{ // Naive wilson dag implementation
ref = zero;
@@ -324,25 +413,30 @@ int main (int argc, char ** argv)
// ref = src - Gamma(Gamma::Algebra::GammaX)* src ; // 1+gamma_x
tmp = U[mu]*Cshift(src,mu+1,1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
}
tmp =adj(U[mu])*src;
tmp =Cshift(tmp,mu+1,-1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
}
}
ref = -0.5*ref;
}
// dump=1;
Dw.Dhop(src,result,1);
std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl;
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "norm dag result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm dag ref "<< norm2(ref)<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
assert(norm2(err)<1.0e-4);
std::cout<<GridLogMessage << "norm dag diff "<< norm2(err)<<std::endl;
if((norm2(err)>1.0e-4)){
std::cout<< "DAG RESULT\n " <<ref << std::endl;
std::cout<< "DAG sRESULT\n " <<result << std::endl;
std::cout<< "DAG ERR \n " << err <<std::endl;
}
LatticeFermion src_e (FrbGrid);
LatticeFermion src_o (FrbGrid);
LatticeFermion r_e (FrbGrid);
@@ -350,18 +444,24 @@ int main (int argc, char ** argv)
LatticeFermion r_eo (FGrid);
std::cout<<GridLogMessage << "Calling Deo and Doe and assert Deo+Doe == Dunprec"<<std::endl;
std::cout<<GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec"<<std::endl;
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd,src_o,src);
std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl;
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
// S-direction is INNERMOST and takes no part in the parity.
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
@@ -369,6 +469,7 @@ int main (int argc, char ** argv)
{
Dw.ZeroCounters();
FGrid->Barrier();
Dw.DhopEO(src_o,r_e,DaggerNo);
double t0=usecond();
for(int i=0;i<ncall;i++){
Dw.DhopEO(src_o,r_e,DaggerNo);
@@ -381,6 +482,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
Dw.Report();
}
Dw.DhopEO(src_o,r_e,DaggerNo);
@@ -396,14 +498,20 @@ int main (int argc, char ** argv)
err = r_eo-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
assert(norm2(err)<1.0e-4);
if((norm2(err)>1.0e-4)){
std::cout<< "Deo RESULT\n " <<r_eo << std::endl;
std::cout<< "Deo REF\n " <<result << std::endl;
std::cout<< "Deo ERR \n " << err <<std::endl;
}
pickCheckerboard(Even,src_e,err);
pickCheckerboard(Odd,src_o,err);
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
assert(norm2(src_e)<1.0e-4);
assert(norm2(src_o)<1.0e-4);
Grid_finalize();
exit(0);
}
+190
View File
@@ -0,0 +1,190 @@
#include <Grid/Grid.h>
#include <sstream>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT
};
typedef typename GparityDomainWallFermionF::FermionField GparityLatticeFermionF;
typedef typename GparityDomainWallFermionD::FermionField GparityLatticeFermionD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int Ls=16;
for(int i=0;i<argc;i++)
if(std::string(argv[i]) == "-Ls"){
std::stringstream ss(argv[i+1]); ss >> Ls;
}
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "Ls = " << Ls << std::endl;
std::vector<int> latt4 = GridDefaultLatt();
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
GparityLatticeFermionF src (FGrid); random(RNG5,src);
RealD N2 = 1.0/::sqrt(norm2(src));
src = src*N2;
GparityLatticeFermionF result(FGrid); result=zero;
GparityLatticeFermionF ref(FGrid); ref=zero;
GparityLatticeFermionF tmp(FGrid);
GparityLatticeFermionF err(FGrid);
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
LatticeGaugeFieldF Umu(UGrid);
SU3::HotConfiguration(RNG4,Umu);
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
RealD mass=0.1;
RealD M5 =1.8;
RealD NP = UGrid->_Nprocessors;
RealD NN = UGrid->NodeCount();
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermion::Dhop "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplexF::Nsimd()<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "* SINGLE/SINGLE"<<std::endl;
GparityDomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
int ncall =1000;
if (1) {
FGrid->Barrier();
Dw.ZeroCounters();
Dw.Dhop(src,result,0);
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
Dw.Dhop(src,result,0);
__SSC_STOP;
}
double t1=usecond();
FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=2*1344*volume*ncall;
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
Dw.Report();
}
std::cout << GridLogMessage<< "* SINGLE/HALF"<<std::endl;
GparityDomainWallFermionFH DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
if (1) {
FGrid->Barrier();
DwH.ZeroCounters();
DwH.Dhop(src,result,0);
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
DwH.Dhop(src,result,0);
__SSC_STOP;
}
double t1=usecond();
FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=2*1344*volume*ncall;
std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
DwH.Report();
}
GridCartesian * UGrid_d = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_d);
GridCartesian * FGrid_d = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_d);
GridRedBlackCartesian * FrbGrid_d = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_d);
std::cout << GridLogMessage<< "* DOUBLE/DOUBLE"<<std::endl;
GparityLatticeFermionD src_d(FGrid_d);
precisionChange(src_d,src);
LatticeGaugeFieldD Umu_d(UGrid_d);
precisionChange(Umu_d,Umu);
GparityLatticeFermionD result_d(FGrid_d);
GparityDomainWallFermionD DwD(Umu_d,*FGrid_d,*FrbGrid_d,*UGrid_d,*UrbGrid_d,mass,M5);
if (1) {
FGrid_d->Barrier();
DwD.ZeroCounters();
DwD.Dhop(src_d,result_d,0);
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
DwD.Dhop(src_d,result_d,0);
__SSC_STOP;
}
double t1=usecond();
FGrid_d->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=2*1344*volume*ncall;
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
DwD.Report();
}
Grid_finalize();
}
+3 -3
View File
@@ -66,7 +66,8 @@ int main (int argc, char ** argv)
Vec tsum; tsum = zero;
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
GridParallelRNG pRNG(&Grid);
pRNG.SeedFixedIntegers(std::vector<int>({56,17,89,101}));
std::vector<double> stop(threads);
Vector<Vec> sum(threads);
@@ -77,8 +78,7 @@ int main (int argc, char ** argv)
}
double start=usecond();
PARALLEL_FOR_LOOP
for(int t=0;t<threads;t++){
parallel_for(int t=0;t<threads;t++){
sum[t] = x[t]._odata[0];
for(int i=0;i<Nloop;i++){
+30 -30
View File
@@ -55,21 +55,21 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
uint64_t lmax=44;
#define NLOOP (1*lmax*lmax*lmax*lmax/vol)
for(int lat=4;lat<=lmax;lat+=4){
uint64_t lmax=96;
#define NLOOP (10*lmax*lmax*lmax*lmax/vol)
for(int lat=8;lat<=lmax;lat+=8){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
uint64_t Nloop=NLOOP;
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeVec z(&Grid); //random(pRNG,z);
LatticeVec x(&Grid); //random(pRNG,x);
LatticeVec y(&Grid); //random(pRNG,y);
LatticeVec z(&Grid);// random(pRNG,z);
LatticeVec x(&Grid);// random(pRNG,x);
LatticeVec y(&Grid);// random(pRNG,y);
double a=2.0;
@@ -83,7 +83,7 @@ int main (int argc, char ** argv)
double time = (stop-start)/Nloop*1000;
double flops=vol*Nvec*2;// mul,add
double bytes=3*vol*Nvec*sizeof(Real);
double bytes=3.0*vol*Nvec*sizeof(Real);
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
}
@@ -94,17 +94,17 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=4;lat<=lmax;lat+=4){
for(int lat=8;lat<=lmax;lat+=8){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeVec z(&Grid); //random(pRNG,z);
LatticeVec x(&Grid); //random(pRNG,x);
LatticeVec y(&Grid); //random(pRNG,y);
LatticeVec z(&Grid);// random(pRNG,z);
LatticeVec x(&Grid);// random(pRNG,x);
LatticeVec y(&Grid);// random(pRNG,y);
double a=2.0;
uint64_t Nloop=NLOOP;
@@ -119,7 +119,7 @@ int main (int argc, char ** argv)
double time = (stop-start)/Nloop*1000;
double flops=vol*Nvec*2;// mul,add
double bytes=3*vol*Nvec*sizeof(Real);
double bytes=3.0*vol*Nvec*sizeof(Real);
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
}
@@ -129,20 +129,20 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
for(int lat=4;lat<=lmax;lat+=4){
for(int lat=8;lat<=lmax;lat+=8){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
uint64_t Nloop=NLOOP;
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeVec z(&Grid); //random(pRNG,z);
LatticeVec x(&Grid); //random(pRNG,x);
LatticeVec y(&Grid); //random(pRNG,y);
LatticeVec z(&Grid);// random(pRNG,z);
LatticeVec x(&Grid);// random(pRNG,x);
LatticeVec y(&Grid);// random(pRNG,y);
RealD a=2.0;
@@ -154,7 +154,7 @@ int main (int argc, char ** argv)
double stop=usecond();
double time = (stop-start)/Nloop*1000;
double bytes=2*vol*Nvec*sizeof(Real);
double bytes=2.0*vol*Nvec*sizeof(Real);
double flops=vol*Nvec*1;// mul
std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
@@ -166,17 +166,17 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=4;lat<=lmax;lat+=4){
for(int lat=8;lat<=lmax;lat+=8){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
uint64_t Nloop=NLOOP;
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeVec z(&Grid); //random(pRNG,z);
LatticeVec x(&Grid); //random(pRNG,x);
LatticeVec y(&Grid); //random(pRNG,y);
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeVec z(&Grid);// random(pRNG,z);
LatticeVec x(&Grid);// random(pRNG,x);
LatticeVec y(&Grid);// random(pRNG,y);
RealD a=2.0;
Real nn;
double start=usecond();
@@ -187,7 +187,7 @@ int main (int argc, char ** argv)
double stop=usecond();
double time = (stop-start)/Nloop*1000;
double bytes=vol*Nvec*sizeof(Real);
double bytes=1.0*vol*Nvec*sizeof(Real);
double flops=vol*Nvec*2;// mul,add
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl;
+134
View File
@@ -0,0 +1,134 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_staggered.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl;
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid);
pRNG.SeedFixedIntegers(seeds);
// pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
typename ImprovedStaggeredFermionR::ImplParams params;
FermionField src (&Grid); random(pRNG,src);
FermionField result(&Grid); result=zero;
FermionField ref(&Grid); ref=zero;
FermionField tmp(&Grid); tmp=zero;
FermionField err(&Grid); tmp=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);
double volume=1;
for(int mu=0;mu<Nd;mu++){
volume=volume*latt_size[mu];
}
// Only one non-zero (y)
#if 0
Umu=zero;
Complex cone(1.0,0.0);
for(int nn=0;nn<Nd;nn++){
random(pRNG,U[nn]);
if(1) {
if (nn!=2) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
// else { U[nn]= cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
else { std::cout<<GridLogMessage << "random gauge field in dir "<<nn<<std::endl; }
}
PokeIndex<LorentzIndex>(Umu,U[nn],nn);
}
#endif
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
}
ref = zero;
/*
{ // Naive wilson implementation
ref = zero;
for(int mu=0;mu<Nd;mu++){
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
tmp = U[mu]*Cshift(src,mu,1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
}
tmp =adj(U[mu])*src;
tmp =Cshift(tmp,mu,-1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
}
}
}
ref = -0.5*ref;
*/
RealD mass=0.1;
RealD c1=9.0/8.0;
RealD c2=-1.0/24.0;
RealD u0=1.0;
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params);
std::cout<<GridLogMessage << "Calling Ds"<<std::endl;
int ncall=1000;
double t0=usecond();
for(int i=0;i<ncall;i++){
Ds.Dhop(src,result,0);
}
double t1=usecond();
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
Grid_finalize();
}
+31 -30
View File
@@ -35,13 +35,14 @@ using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
#define LMAX (64)
int Nloop=1000;
int64_t Nloop=20;
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
int threads = GridThread::GetThreads();
int64_t threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
@@ -50,19 +51,19 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=32;lat+=2){
for(int lat=2;lat<=LMAX;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeColourMatrix z(&Grid);// random(pRNG,z);
LatticeColourMatrix x(&Grid);// random(pRNG,x);
LatticeColourMatrix y(&Grid);// random(pRNG,y);
LatticeColourMatrix z(&Grid); random(pRNG,z);
LatticeColourMatrix x(&Grid); random(pRNG,x);
LatticeColourMatrix y(&Grid); random(pRNG,y);
double start=usecond();
for(int i=0;i<Nloop;i++){
for(int64_t i=0;i<Nloop;i++){
x=x*y;
}
double stop=usecond();
@@ -82,20 +83,20 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=32;lat+=2){
for(int lat=2;lat<=LMAX;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeColourMatrix z(&Grid); //random(pRNG,z);
LatticeColourMatrix x(&Grid); //random(pRNG,x);
LatticeColourMatrix y(&Grid); //random(pRNG,y);
LatticeColourMatrix z(&Grid); random(pRNG,z);
LatticeColourMatrix x(&Grid); random(pRNG,x);
LatticeColourMatrix y(&Grid); random(pRNG,y);
double start=usecond();
for(int i=0;i<Nloop;i++){
for(int64_t i=0;i<Nloop;i++){
z=x*y;
}
double stop=usecond();
@@ -113,20 +114,20 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=32;lat+=2){
for(int lat=2;lat<=LMAX;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeColourMatrix z(&Grid); //random(pRNG,z);
LatticeColourMatrix x(&Grid); //random(pRNG,x);
LatticeColourMatrix y(&Grid); //random(pRNG,y);
LatticeColourMatrix z(&Grid); random(pRNG,z);
LatticeColourMatrix x(&Grid); random(pRNG,x);
LatticeColourMatrix y(&Grid); random(pRNG,y);
double start=usecond();
for(int i=0;i<Nloop;i++){
for(int64_t i=0;i<Nloop;i++){
mult(z,x,y);
}
double stop=usecond();
@@ -144,20 +145,20 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=32;lat+=2){
for(int lat=2;lat<=LMAX;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeColourMatrix z(&Grid); //random(pRNG,z);
LatticeColourMatrix x(&Grid); //random(pRNG,x);
LatticeColourMatrix y(&Grid); //random(pRNG,y);
LatticeColourMatrix z(&Grid); random(pRNG,z);
LatticeColourMatrix x(&Grid); random(pRNG,x);
LatticeColourMatrix y(&Grid); random(pRNG,y);
double start=usecond();
for(int i=0;i<Nloop;i++){
for(int64_t i=0;i<Nloop;i++){
mac(z,x,y);
}
double stop=usecond();
+2 -2
View File
@@ -58,7 +58,7 @@ int main (int argc, char ** argv)
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
@@ -69,7 +69,7 @@ int main (int argc, char ** argv)
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid);
pRNG.SeedFixedIntegers(seeds);
// pRNG.SeedRandomDevice();
// pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
LatticeFermion src (&Grid); random(pRNG,src);
LatticeFermion result(&Grid); result=zero;
+1 -1
View File
@@ -93,7 +93,7 @@ int main (int argc, char ** argv)
std::cout << latt_size.back() << "\t\t";
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
+5 -9
View File
@@ -1,11 +1,7 @@
include Make.inc
simple: simple_su3_test.o simple_su3_expr.o simple_simd_test.o
EXTRA_LIBRARIES = libsimple_su3_test.a libsimple_su3_expr.a libsimple_simd_test.a
libsimple_su3_test_a_SOURCES = simple_su3_test.cc
libsimple_su3_expr_a_SOURCES = simple_su3_expr.cc
libsimple_simd_test_a_SOURCES = simple_simd_test.cc
bench-local: all
./Benchmark_su3
./Benchmark_memory_bandwidth
./Benchmark_wilson
./Benchmark_dwf --dslash-unroll
+1 -1
View File
@@ -1,6 +1,6 @@
#!/usr/bin/env bash
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2'
echo "-- deploying Eigen source..."
wget ${EIGEN_URL} --no-check-certificate
+182 -59
View File
@@ -1,16 +1,23 @@
AC_PREREQ([2.63])
AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid])
AC_INIT([Grid], [0.7.0], [https://github.com/paboyle/Grid], [Grid])
AC_CANONICAL_BUILD
AC_CANONICAL_HOST
AC_CANONICAL_TARGET
AM_INIT_AUTOMAKE(subdir-objects)
AM_INIT_AUTOMAKE([subdir-objects 1.13])
AM_EXTRA_RECURSIVE_TARGETS([tests bench])
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_SRCDIR([lib/Grid.h])
AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
############### Checks for programs
################ Get git info
#AC_REVISION([m4_esyscmd_s([./scripts/configure.commit])])
################ Set flags
# do not move!
CXXFLAGS="-O3 $CXXFLAGS"
############### Checks for programs
AC_PROG_CXX
AC_PROG_RANLIB
@@ -24,12 +31,14 @@ AX_GXX_VERSION
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
[version of g++ that will compile the code])
############### Checks for typedefs, structures, and compiler characteristics
AC_TYPE_SIZE_T
AC_TYPE_UINT32_T
AC_TYPE_UINT64_T
############### OpenMP
############### OpenMP
AC_OPENMP
ac_openmp=no
if test "${OPENMP_CXXFLAGS}X" != "X"; then
@@ -45,9 +54,14 @@ AC_CHECK_HEADERS(malloc/malloc.h)
AC_CHECK_HEADERS(malloc.h)
AC_CHECK_HEADERS(endian.h)
AC_CHECK_HEADERS(execinfo.h)
AC_CHECK_HEADERS(numaif.h)
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
############## Standard libraries
AC_CHECK_LIB([m],[cos])
AC_CHECK_LIB([stdc++],[abort])
############### GMP and MPFR
AC_ARG_WITH([gmp],
[AS_HELP_STRING([--with-gmp=prefix],
@@ -60,16 +74,23 @@ AC_ARG_WITH([mpfr],
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
############### FFTW3
AC_ARG_WITH([fftw],
############### FFTW3
AC_ARG_WITH([fftw],
[AS_HELP_STRING([--with-fftw=prefix],
[try this for a non-standard install prefix of the FFTW3 library])],
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
############### lapack
############### LIME
AC_ARG_WITH([lime],
[AS_HELP_STRING([--with-lime=prefix],
[try this for a non-standard install prefix of the LIME library])],
[AM_CXXFLAGS="-I$with_lime/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_lime/lib $AM_LDFLAGS"])
############### lapack
AC_ARG_ENABLE([lapack],
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
[ac_LAPACK=${enable_lapack}], [ac_LAPACK=no])
case ${ac_LAPACK} in
@@ -83,6 +104,18 @@ case ${ac_LAPACK} in
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
esac
############### FP16 conversions
AC_ARG_ENABLE([sfw-fp16],
[AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])],
[ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes])
case ${ac_SFW_FP16} in
yes)
AC_DEFINE([SFW_FP16],[1],[software conversion to fp16]);;
no);;
*)
AC_MSG_ERROR(["SFW FP16 option not supported ${ac_SFW_FP16}"]);;
esac
############### MKL
AC_ARG_ENABLE([mkl],
[AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])],
@@ -108,7 +141,7 @@ AC_ARG_WITH([hdf5],
############### first-touch
AC_ARG_ENABLE([numa],
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
[ac_NUMA=${enable_NUMA}],[ac_NUMA=no])
case ${ac_NUMA} in
@@ -134,8 +167,8 @@ if test "${ac_MKL}x" != "nox"; then
fi
AC_SEARCH_LIBS([__gmpf_init], [gmp],
[AC_SEARCH_LIBS([mpfr_init], [mpfr],
[AC_DEFINE([HAVE_LIBMPFR], [1],
[AC_SEARCH_LIBS([mpfr_init], [mpfr],
[AC_DEFINE([HAVE_LIBMPFR], [1],
[Define to 1 if you have the `MPFR' library])]
[have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])]
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])]
@@ -144,7 +177,7 @@ AC_SEARCH_LIBS([__gmpf_init], [gmp],
if test "${ac_LAPACK}x" != "nox"; then
AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [],
[AC_MSG_ERROR("LAPACK enabled but library not found")])
fi
fi
AC_SEARCH_LIBS([fftw_execute], [fftw3],
[AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [],
@@ -152,6 +185,23 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3],
[AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])]
[have_fftw=true])
AC_SEARCH_LIBS([limeCreateReader], [lime],
[AC_DEFINE([HAVE_LIME], [1], [Define to 1 if you have the `LIME' library])]
[have_lime=true],
[AC_MSG_WARN(C-LIME library was not found in your system.
In order to use ILGG file format please install or provide the correct path to your installation
Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)])
AC_SEARCH_LIBS([crc32], [z],
[AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])]
[have_zlib=true] [LIBS="${LIBS} -lz"],
[AC_MSG_ERROR(zlib library was not found in your system.)])
AC_SEARCH_LIBS([move_pages], [numa],
[AC_DEFINE([HAVE_LIBNUMA], [1], [Define to 1 if you have the `LIBNUMA' library])]
[have_libnuma=true] [LIBS="${LIBS} -lnuma"],
[AC_MSG_WARN(libnuma library was not found in your system. Some optimisations will not apply)])
AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp],
[AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])]
[have_hdf5=true]
@@ -176,19 +226,26 @@ case ${ax_cv_cxx_compiler_vendor} in
case ${ac_SIMD} in
SSE4)
AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
SIMD_FLAGS='-msse4.2';;
case ${ac_SFW_FP16} in
yes)
SIMD_FLAGS='-msse4.2';;
no)
SIMD_FLAGS='-msse4.2 -mf16c';;
*)
AC_MSG_ERROR(["SFW_FP16 must be either yes or no value ${ac_SFW_FP16} "]);;
esac;;
AVX)
AC_DEFINE([AVX1],[1],[AVX intrinsics])
SIMD_FLAGS='-mavx';;
SIMD_FLAGS='-mavx -mf16c';;
AVXFMA4)
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
SIMD_FLAGS='-mavx -mfma4';;
SIMD_FLAGS='-mavx -mfma4 -mf16c';;
AVXFMA)
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3])
SIMD_FLAGS='-mavx -mfma';;
SIMD_FLAGS='-mavx -mfma -mf16c';;
AVX2)
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
SIMD_FLAGS='-mavx2 -mfma';;
SIMD_FLAGS='-mavx2 -mfma -mf16c';;
AVX512)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
@@ -197,6 +254,7 @@ case ${ax_cv_cxx_compiler_vendor} in
SIMD_FLAGS='';;
KNL)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
AC_DEFINE([KNL],[1],[Knights landing processor])
SIMD_FLAGS='-march=knl';;
GEN)
AC_DEFINE([GEN],[1],[generic vector code])
@@ -204,6 +262,9 @@ case ${ax_cv_cxx_compiler_vendor} in
[generic SIMD vector width (in bytes)])
SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)"
SIMD_FLAGS='';;
NEONv8)
AC_DEFINE([NEONV8],[1],[ARMv8 NEON])
SIMD_FLAGS='-march=armv8-a';;
QPX|BGQ)
AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q])
SIMD_FLAGS='';;
@@ -232,6 +293,7 @@ case ${ax_cv_cxx_compiler_vendor} in
SIMD_FLAGS='';;
KNL)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing])
AC_DEFINE([KNL],[1],[Knights landing processor])
SIMD_FLAGS='-xmic-avx512';;
GEN)
AC_DEFINE([GEN],[1],[generic vector code])
@@ -269,8 +331,41 @@ case ${ac_PRECISION} in
double)
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
;;
*)
AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]);
;;
esac
###################### Shared memory allocation technique under MPI3
AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmget|shmopen|hugetlbfs],
[Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen])
case ${ac_SHM} in
shmget)
AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] )
;;
shmopen)
AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] )
;;
hugetlbfs)
AC_DEFINE([GRID_MPI3_SHMMMAP],[1],[GRID_MPI3_SHMMMAP] )
;;
*)
AC_MSG_ERROR([${ac_SHM} unsupported --enable-shm option]);
;;
esac
###################### Shared base path for SHMMMAP
AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path],
[Select SHM mmap base path for hugetlbfs])],
[ac_SHMPATH=${enable_shmpath}],
[ac_SHMPATH=/var/lib/hugetlbfs/pagesize-2MB/])
AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing])
############### communication type selection
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem],
[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
@@ -280,14 +375,14 @@ case ${ac_COMMS} in
AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
comms_type='none'
;;
mpi3l*)
AC_DEFINE([GRID_COMMS_MPI3L],[1],[GRID_COMMS_MPI3L] )
comms_type='mpi3l'
;;
mpi3*)
AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] )
comms_type='mpi3'
;;
mpit)
AC_DEFINE([GRID_COMMS_MPIT],[1],[GRID_COMMS_MPIT] )
comms_type='mpit'
;;
mpi*)
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
comms_type='mpi'
@@ -297,7 +392,7 @@ case ${ac_COMMS} in
comms_type='shmem'
;;
*)
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
;;
esac
case ${ac_COMMS} in
@@ -315,13 +410,13 @@ esac
AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ])
AM_CONDITIONAL(BUILD_COMMS_MPI, [ test "${comms_type}X" == "mpiX" ])
AM_CONDITIONAL(BUILD_COMMS_MPI3, [ test "${comms_type}X" == "mpi3X" ] )
AM_CONDITIONAL(BUILD_COMMS_MPI3L, [ test "${comms_type}X" == "mpi3lX" ] )
AM_CONDITIONAL(BUILD_COMMS_MPIT, [ test "${comms_type}X" == "mpitX" ] )
AM_CONDITIONAL(BUILD_COMMS_NONE, [ test "${comms_type}X" == "noneX" ])
############### RNG selection
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937|sitmo],\
[Select Random Number Generator to be used])],\
[ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
[ac_RNG=${enable_rng}],[ac_RNG=sitmo])
case ${ac_RNG} in
ranlux48)
@@ -334,7 +429,7 @@ case ${ac_RNG} in
AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] )
;;
*)
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
;;
esac
@@ -351,7 +446,7 @@ case ${ac_TIMERS} in
AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
;;
*)
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
;;
esac
@@ -363,7 +458,7 @@ case ${ac_CHROMA} in
yes|no)
;;
*)
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
;;
esac
@@ -384,12 +479,67 @@ DX_INIT_DOXYGEN([$PACKAGE_NAME], [doxygen.cfg])
############### Ouput
cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd}
GRID_CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
GRID_LDFLAGS="$AM_LDFLAGS $LDFLAGS"
GRID_LIBS=$LIBS
GRID_SHORT_SHA=`git rev-parse --short HEAD`
GRID_SHA=`git rev-parse HEAD`
GRID_BRANCH=`git rev-parse --abbrev-ref HEAD`
AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS"
AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS"
AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS"
AC_SUBST([AM_CFLAGS])
AC_SUBST([AM_CXXFLAGS])
AC_SUBST([AM_LDFLAGS])
AC_SUBST([GRID_CXXFLAGS])
AC_SUBST([GRID_LDFLAGS])
AC_SUBST([GRID_LIBS])
AC_SUBST([GRID_SHA])
AC_SUBST([GRID_BRANCH])
git_commit=`cd $srcdir && ./scripts/configure.commit`
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Summary of configuration for $PACKAGE v$VERSION
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
----- GIT VERSION -------------------------------------
$git_commit
----- PLATFORM ----------------------------------------
architecture (build) : $build_cpu
os (build) : $build_os
architecture (target) : $target_cpu
os (target) : $target_os
compiler vendor : ${ax_cv_cxx_compiler_vendor}
compiler version : ${ax_cv_gxx_version}
----- BUILD OPTIONS -----------------------------------
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
Threading : ${ac_openmp}
Communications type : ${comms_type}
Shared memory allocator : ${ac_SHM}
Shared memory mmap path : ${ac_SHMPATH}
Default precision : ${ac_PRECISION}
Software FP16 conversion : ${ac_SFW_FP16}
RNG choice : ${ac_RNG}
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
LAPACK : ${ac_LAPACK}
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
LIME (ILDG support) : `if test "x$have_lime" = xtrue; then echo yes; else echo no; fi`
HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
----- BUILD FLAGS -------------------------------------
CXXFLAGS:
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LDFLAGS:
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LIBS:
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
-------------------------------------------------------" > grid.configure.summary
GRID_SUMMARY="`cat grid.configure.summary`"
AM_SUBST_NOTMAKE([GRID_SUMMARY])
AC_SUBST([GRID_SUMMARY])
AC_CONFIG_FILES([grid-config], [chmod +x grid-config])
AC_CONFIG_FILES(Makefile)
AC_CONFIG_FILES(lib/Makefile)
AC_CONFIG_FILES(tests/Makefile)
@@ -400,42 +550,15 @@ AC_CONFIG_FILES(tests/forces/Makefile)
AC_CONFIG_FILES(tests/hadrons/Makefile)
AC_CONFIG_FILES(tests/hmc/Makefile)
AC_CONFIG_FILES(tests/solver/Makefile)
AC_CONFIG_FILES(tests/smearing/Makefile)
AC_CONFIG_FILES(tests/qdpxx/Makefile)
AC_CONFIG_FILES(tests/testu01/Makefile)
AC_CONFIG_FILES(benchmarks/Makefile)
AC_CONFIG_FILES(extras/Makefile)
AC_CONFIG_FILES(extras/Hadrons/Makefile)
AC_OUTPUT
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Summary of configuration for $PACKAGE v$VERSION
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
echo ""
cat grid.configure.summary
echo ""
----- PLATFORM ----------------------------------------
architecture (build) : $build_cpu
os (build) : $build_os
architecture (target) : $target_cpu
os (target) : $target_os
compiler vendor : ${ax_cv_cxx_compiler_vendor}
compiler version : ${ax_cv_gxx_version}
----- BUILD OPTIONS -----------------------------------
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
Threading : ${ac_openmp}
Communications type : ${comms_type}
Default precision : ${ac_PRECISION}
RNG choice : ${ac_RNG}
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
LAPACK : ${ac_LAPACK}
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
----- BUILD FLAGS -------------------------------------
CXXFLAGS:
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LDFLAGS:
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LIBS:
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
-------------------------------------------------------" > config.summary
echo ""
cat config.summary
echo ""
+2 -1
View File
@@ -162,7 +162,8 @@ void Application::saveParameterFile(const std::string parameterFileName)
sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)"
#define DEFINE_MEMPEAK \
auto memPeak = [this](const std::vector<unsigned int> &program)\
GeneticScheduler<unsigned int>::ObjFunc memPeak = \
[this](const std::vector<unsigned int> &program)\
{\
unsigned int memPeak;\
bool msg;\
+53 -3
View File
@@ -41,9 +41,10 @@ using namespace Hadrons;
// constructor /////////////////////////////////////////////////////////////////
Environment::Environment(void)
{
nd_ = GridDefaultLatt().size();
dim_ = GridDefaultLatt();
nd_ = dim_.size();
grid4d_.reset(SpaceTimeGrid::makeFourDimGrid(
GridDefaultLatt(), GridDefaultSimd(nd_, vComplex::Nsimd()),
dim_, GridDefaultSimd(nd_, vComplex::Nsimd()),
GridDefaultMpi()));
gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get()));
auto loc = getGrid()->LocalDimensions();
@@ -132,6 +133,16 @@ unsigned int Environment::getNd(void) const
return nd_;
}
std::vector<int> Environment::getDim(void) const
{
return dim_;
}
int Environment::getDim(const unsigned int mu) const
{
return dim_[mu];
}
// random number generator /////////////////////////////////////////////////////
void Environment::setSeed(const std::vector<int> &seed)
{
@@ -271,6 +282,21 @@ std::string Environment::getModuleType(const std::string name) const
return getModuleType(getModuleAddress(name));
}
std::string Environment::getModuleNamespace(const unsigned int address) const
{
std::string type = getModuleType(address), ns;
auto pos2 = type.rfind("::");
auto pos1 = type.rfind("::", pos2 - 2);
return type.substr(pos1 + 2, pos2 - pos1 - 2);
}
std::string Environment::getModuleNamespace(const std::string name) const
{
return getModuleNamespace(getModuleAddress(name));
}
bool Environment::hasModule(const unsigned int address) const
{
return (address < module_.size());
@@ -492,7 +518,14 @@ std::string Environment::getObjectType(const unsigned int address) const
{
if (hasRegisteredObject(address))
{
return typeName(object_[address].type);
if (object_[address].type)
{
return typeName(object_[address].type);
}
else
{
return "<no type>";
}
}
else if (hasObject(address))
{
@@ -532,6 +565,23 @@ Environment::Size Environment::getObjectSize(const std::string name) const
return getObjectSize(getObjectAddress(name));
}
unsigned int Environment::getObjectModule(const unsigned int address) const
{
if (hasObject(address))
{
return object_[address].module;
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
}
}
unsigned int Environment::getObjectModule(const std::string name) const
{
return getObjectModule(getObjectAddress(name));
}
unsigned int Environment::getObjectLs(const unsigned int address) const
{
if (hasRegisteredObject(address))
+43 -1
View File
@@ -106,6 +106,8 @@ public:
void createGrid(const unsigned int Ls);
GridCartesian * getGrid(const unsigned int Ls = 1) const;
GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const;
std::vector<int> getDim(void) const;
int getDim(const unsigned int mu) const;
unsigned int getNd(void) const;
// random number generator
void setSeed(const std::vector<int> &seed);
@@ -131,6 +133,8 @@ public:
std::string getModuleName(const unsigned int address) const;
std::string getModuleType(const unsigned int address) const;
std::string getModuleType(const std::string name) const;
std::string getModuleNamespace(const unsigned int address) const;
std::string getModuleNamespace(const std::string name) const;
bool hasModule(const unsigned int address) const;
bool hasModule(const std::string name) const;
Graph<unsigned int> makeModuleGraph(void) const;
@@ -171,6 +175,8 @@ public:
std::string getObjectType(const std::string name) const;
Size getObjectSize(const unsigned int address) const;
Size getObjectSize(const std::string name) const;
unsigned int getObjectModule(const unsigned int address) const;
unsigned int getObjectModule(const std::string name) const;
unsigned int getObjectLs(const unsigned int address) const;
unsigned int getObjectLs(const std::string name) const;
bool hasObject(const unsigned int address) const;
@@ -181,6 +187,10 @@ public:
bool hasCreatedObject(const std::string name) const;
bool isObject5d(const unsigned int address) const;
bool isObject5d(const std::string name) const;
template <typename T>
bool isObjectOfType(const unsigned int address) const;
template <typename T>
bool isObjectOfType(const std::string name) const;
Environment::Size getTotalSize(void) const;
void addOwnership(const unsigned int owner,
const unsigned int property);
@@ -197,6 +207,7 @@ private:
bool dryRun_{false};
unsigned int traj_, locVol_;
// grids
std::vector<int> dim_;
GridPt grid4d_;
std::map<unsigned int, GridPt> grid5d_;
GridRbPt gridRb4d_;
@@ -343,7 +354,7 @@ T * Environment::getObject(const unsigned int address) const
else
{
HADRON_ERROR("object with address " + std::to_string(address) +
" does not have type '" + typeid(T).name() +
" does not have type '" + typeName(&typeid(T)) +
"' (has type '" + getObjectType(address) + "')");
}
}
@@ -380,6 +391,37 @@ T * Environment::createLattice(const std::string name)
return createLattice<T>(getObjectAddress(name));
}
template <typename T>
bool Environment::isObjectOfType(const unsigned int address) const
{
if (hasRegisteredObject(address))
{
if (auto h = dynamic_cast<Holder<T> *>(object_[address].data.get()))
{
return true;
}
else
{
return false;
}
}
else if (hasObject(address))
{
HADRON_ERROR("object with address " + std::to_string(address) +
" exists but is not registered");
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
}
}
template <typename T>
bool Environment::isObjectOfType(const std::string name) const
{
return isObjectOfType<T>(getObjectAddress(name));
}
END_HADRONS_NAMESPACE
#endif // Hadrons_Environment_hpp_
+33 -4
View File
@@ -51,23 +51,43 @@ using Grid::operator<<;
* error with GCC 5 (clang & GCC 6 compile fine without it).
*/
// FIXME: find a way to do that in a more general fashion
#ifndef FIMPL
#define FIMPL WilsonImplR
#endif
#ifndef SIMPL
#define SIMPL ScalarImplCR
#endif
BEGIN_HADRONS_NAMESPACE
// type aliases
#define TYPE_ALIASES(FImpl, suffix)\
#define FERM_TYPE_ALIASES(FImpl, suffix)\
typedef FermionOperator<FImpl> FMat##suffix; \
typedef typename FImpl::FermionField FermionField##suffix; \
typedef typename FImpl::PropagatorField PropagatorField##suffix; \
typedef typename FImpl::SitePropagator SitePropagator##suffix; \
typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix;\
typedef std::function<void(FermionField##suffix &, \
typedef std::vector<typename FImpl::SitePropagator::scalar_object> \
SlicedPropagator##suffix;
#define GAUGE_TYPE_ALIASES(FImpl, suffix)\
typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix;
#define SCALAR_TYPE_ALIASES(SImpl, suffix)\
typedef typename SImpl::Field ScalarField##suffix;\
typedef typename SImpl::Field PropagatorField##suffix;
#define SOLVER_TYPE_ALIASES(FImpl, suffix)\
typedef std::function<void(FermionField##suffix &,\
const FermionField##suffix &)> SolverFn##suffix;
#define SINK_TYPE_ALIASES(suffix)\
typedef std::function<SlicedPropagator##suffix(const PropagatorField##suffix &)> SinkFn##suffix;
#define FGS_TYPE_ALIASES(FImpl, suffix)\
FERM_TYPE_ALIASES(FImpl, suffix)\
GAUGE_TYPE_ALIASES(FImpl, suffix)\
SOLVER_TYPE_ALIASES(FImpl, suffix)
// logger
class HadronsLogger: public Logger
{
@@ -145,6 +165,15 @@ std::string typeName(void)
return typeName(typeIdPt<T>());
}
// default writers/readers
#ifdef HAVE_HDF5
typedef Hdf5Reader CorrReader;
typedef Hdf5Writer CorrWriter;
#else
typedef XmlReader CorrReader;
typedef XmlWriter CorrWriter;
#endif
END_HADRONS_NAMESPACE
#endif // Hadrons_Global_hpp_
+14 -29
View File
@@ -1,40 +1,25 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MAction/DWF.hpp>
#include <Grid/Hadrons/Modules/MAction/Wilson.hpp>
#include <Grid/Hadrons/Modules/MContraction/Baryon.hpp>
#include <Grid/Hadrons/Modules/MContraction/DiscLoop.hpp>
#include <Grid/Hadrons/Modules/MContraction/Gamma3pt.hpp>
#include <Grid/Hadrons/Modules/MContraction/Meson.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp>
#include <Grid/Hadrons/Modules/MFermion/GaugeProp.hpp>
#include <Grid/Hadrons/Modules/MGauge/Load.hpp>
#include <Grid/Hadrons/Modules/MGauge/Random.hpp>
#include <Grid/Hadrons/Modules/MGauge/StochEm.hpp>
#include <Grid/Hadrons/Modules/MGauge/Unit.hpp>
#include <Grid/Hadrons/Modules/MLoop/NoiseLoop.hpp>
#include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp>
#include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp>
#include <Grid/Hadrons/Modules/MScalar/Scalar.hpp>
#include <Grid/Hadrons/Modules/MSink/Point.hpp>
#include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp>
#include <Grid/Hadrons/Modules/MSource/Point.hpp>
#include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp>
#include <Grid/Hadrons/Modules/MSource/Wall.hpp>
#include <Grid/Hadrons/Modules/MSource/Z2.hpp>
#include <Grid/Hadrons/Modules/Quark.hpp>
+12 -6
View File
@@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_DWF_hpp_
#define Hadrons_DWF_hpp_
#ifndef Hadrons_MAction_DWF_hpp_
#define Hadrons_MAction_DWF_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
@@ -48,14 +48,15 @@ public:
std::string, gauge,
unsigned int, Ls,
double , mass,
double , M5);
double , M5,
std::string , boundary);
};
template <typename FImpl>
class TDWF: public Module<DWFPar>
{
public:
TYPE_ALIASES(FImpl,);
FGS_TYPE_ALIASES(FImpl,);
public:
// constructor
TDWF(const std::string name);
@@ -116,14 +117,19 @@ void TDWF<FImpl>::execute(void)
<< par().mass << ", M5= " << par().M5 << " and Ls= "
<< par().Ls << " using gauge field '" << par().gauge << "'"
<< std::endl;
LOG(Message) << "Fermion boundary conditions: " << par().boundary
<< std::endl;
env().createGrid(par().Ls);
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
auto &g4 = *env().getGrid();
auto &grb4 = *env().getRbGrid();
auto &g5 = *env().getGrid(par().Ls);
auto &grb5 = *env().getRbGrid(par().Ls);
std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
typename DomainWallFermion<FImpl>::ImplParams implParams(boundary);
FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4,
par().mass, par().M5);
par().mass, par().M5,
implParams);
env().setObject(getName(), fMatPt);
}
@@ -131,4 +137,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_DWF_hpp_
#endif // Hadrons_MAction_DWF_hpp_
+11 -5
View File
@@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Wilson_hpp_
#define Hadrons_Wilson_hpp_
#ifndef Hadrons_MAction_Wilson_hpp_
#define Hadrons_MAction_Wilson_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
@@ -46,14 +46,15 @@ class WilsonPar: Serializable
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar,
std::string, gauge,
double , mass);
double , mass,
std::string, boundary);
};
template <typename FImpl>
class TWilson: public Module<WilsonPar>
{
public:
TYPE_ALIASES(FImpl,);
FGS_TYPE_ALIASES(FImpl,);
public:
// constructor
TWilson(const std::string name);
@@ -112,10 +113,15 @@ void TWilson<FImpl>::execute()
{
LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass
<< " using gauge field '" << par().gauge << "'" << std::endl;
LOG(Message) << "Fermion boundary conditions: " << par().boundary
<< std::endl;
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
auto &grid = *env().getGrid();
auto &gridRb = *env().getRbGrid();
FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass);
std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
typename WilsonFermion<FImpl>::ImplParams implParams(boundary);
FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass,
implParams);
env().setObject(getName(), fMatPt);
}
@@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Baryon_hpp_
#define Hadrons_Baryon_hpp_
#ifndef Hadrons_MContraction_Baryon_hpp_
#define Hadrons_MContraction_Baryon_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
@@ -55,9 +55,9 @@ template <typename FImpl1, typename FImpl2, typename FImpl3>
class TBaryon: public Module<BaryonPar>
{
public:
TYPE_ALIASES(FImpl1, 1);
TYPE_ALIASES(FImpl2, 2);
TYPE_ALIASES(FImpl3, 3);
FERM_TYPE_ALIASES(FImpl1, 1);
FERM_TYPE_ALIASES(FImpl2, 2);
FERM_TYPE_ALIASES(FImpl3, 3);
class Result: Serializable
{
public:
@@ -112,7 +112,7 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void)
<< " quarks '" << par().q1 << "', '" << par().q2 << "', and '"
<< par().q3 << "'" << std::endl;
XmlWriter writer(par().output);
CorrWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q2);
@@ -121,11 +121,11 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void)
// FIXME: do contractions
write(writer, "meson", result);
// write(writer, "meson", result);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Baryon_hpp_
#endif // Hadrons_MContraction_Baryon_hpp_
@@ -0,0 +1,144 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/DiscLoop.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MContraction_DiscLoop_hpp_
#define Hadrons_MContraction_DiscLoop_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* DiscLoop *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
class DiscLoopPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(DiscLoopPar,
std::string, q_loop,
Gamma::Algebra, gamma,
std::string, output);
};
template <typename FImpl>
class TDiscLoop: public Module<DiscLoopPar>
{
FERM_TYPE_ALIASES(FImpl,);
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
Gamma::Algebra, gamma,
std::vector<Complex>, corr);
};
public:
// constructor
TDiscLoop(const std::string name);
// destructor
virtual ~TDiscLoop(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(DiscLoop, TDiscLoop<FIMPL>, MContraction);
/******************************************************************************
* TDiscLoop implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TDiscLoop<FImpl>::TDiscLoop(const std::string name)
: Module<DiscLoopPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TDiscLoop<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().q_loop};
return in;
}
template <typename FImpl>
std::vector<std::string> TDiscLoop<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDiscLoop<FImpl>::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDiscLoop<FImpl>::execute(void)
{
LOG(Message) << "Computing disconnected loop contraction '" << getName()
<< "' using '" << par().q_loop << "' with " << par().gamma
<< " insertion." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q_loop = *env().template getObject<PropagatorField>(par().q_loop);
LatticeComplex c(env().getGrid());
Gamma gamma(par().gamma);
std::vector<TComplex> buf;
Result result;
c = trace(gamma*q_loop);
sliceSum(c, buf, Tp);
result.gamma = par().gamma;
result.corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
{
result.corr[t] = TensorRemove(buf[t]);
}
write(writer, "disc", result);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MContraction_DiscLoop_hpp_
@@ -0,0 +1,170 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/Gamma3pt.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MContraction_Gamma3pt_hpp_
#define Hadrons_MContraction_Gamma3pt_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
* 3pt contraction with gamma matrix insertion.
*
* Schematic:
*
* q2 q3
* /----<------*------<----¬
* / gamma \
* / \
* i * * f
* \ /
* \ /
* \----------->----------/
* q1
*
* trace(g5*q1*adj(q2)*g5*gamma*q3)
*/
/******************************************************************************
* Gamma3pt *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
class Gamma3ptPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Gamma3ptPar,
std::string, q1,
std::string, q2,
std::string, q3,
Gamma::Algebra, gamma,
std::string, output);
};
template <typename FImpl1, typename FImpl2, typename FImpl3>
class TGamma3pt: public Module<Gamma3ptPar>
{
FERM_TYPE_ALIASES(FImpl1, 1);
FERM_TYPE_ALIASES(FImpl2, 2);
FERM_TYPE_ALIASES(FImpl3, 3);
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
Gamma::Algebra, gamma,
std::vector<Complex>, corr);
};
public:
// constructor
TGamma3pt(const std::string name);
// destructor
virtual ~TGamma3pt(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Gamma3pt, ARG(TGamma3pt<FIMPL, FIMPL, FIMPL>), MContraction);
/******************************************************************************
* TGamma3pt implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
TGamma3pt<FImpl1, FImpl2, FImpl3>::TGamma3pt(const std::string name)
: Module<Gamma3ptPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3};
return in;
}
template <typename FImpl1, typename FImpl2, typename FImpl3>
std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TGamma3pt<FImpl1, FImpl2, FImpl3>::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TGamma3pt<FImpl1, FImpl2, FImpl3>::execute(void)
{
LOG(Message) << "Computing 3pt contractions '" << getName() << "' using"
<< " quarks '" << par().q1 << "', '" << par().q2 << "' and '"
<< par().q3 << "', with " << par().gamma << " insertion."
<< std::endl;
CorrWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q3);
LatticeComplex c(env().getGrid());
Gamma g5(Gamma::Algebra::Gamma5);
Gamma gamma(par().gamma);
std::vector<TComplex> buf;
Result result;
c = trace(g5*q1*adj(q2)*(g5*gamma)*q3);
sliceSum(c, buf, Tp);
result.gamma = par().gamma;
result.corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
{
result.corr[t] = TensorRemove(buf[t]);
}
write(writer, "gamma3pt", result);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MContraction_Gamma3pt_hpp_
+132 -25
View File
@@ -6,8 +6,10 @@ Source file: extras/Hadrons/Modules/MContraction/Meson.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2017
Author: Antonin Portelli <antonin.portelli@me.com>
Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -27,8 +29,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Meson_hpp_
#define Hadrons_Meson_hpp_
#ifndef Hadrons_MContraction_Meson_hpp_
#define Hadrons_MContraction_Meson_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
@@ -36,32 +38,56 @@ See the full license in the file "LICENSE" in the top level distribution directo
BEGIN_HADRONS_NAMESPACE
/*
Meson contractions
-----------------------------
* options:
- q1: input propagator 1 (string)
- q2: input propagator 2 (string)
- gammas: gamma products to insert at sink & source, pairs of gamma matrices
(space-separated strings) in angled brackets (i.e. <g_sink g_src>),
in a sequence (e.g. "<Gamma5 Gamma5><Gamma5 GammaT>").
Special values: "all" - perform all possible contractions.
- mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0."),
given as multiples of (2*pi) / L.
*/
/******************************************************************************
* TMeson *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
typedef std::pair<Gamma::Algebra, Gamma::Algebra> GammaPair;
class MesonPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar,
std::string, q1,
std::string, q2,
std::string, output,
Gamma::Algebra, gammaSource,
Gamma::Algebra, gammaSink);
std::string, q1,
std::string, q2,
std::string, gammas,
std::string, sink,
std::string, output);
};
template <typename FImpl1, typename FImpl2>
class TMeson: public Module<MesonPar>
{
public:
TYPE_ALIASES(FImpl1, 1);
TYPE_ALIASES(FImpl2, 2);
FERM_TYPE_ALIASES(FImpl1, 1);
FERM_TYPE_ALIASES(FImpl2, 2);
FERM_TYPE_ALIASES(ScalarImplCR, Scalar);
SINK_TYPE_ALIASES(Scalar);
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result, std::vector<Complex>, corr);
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
Gamma::Algebra, gamma_snk,
Gamma::Algebra, gamma_src,
std::vector<Complex>, corr);
};
public:
// constructor
@@ -71,6 +97,7 @@ public:
// dependencies/products
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
virtual void parseGammaString(std::vector<GammaPair> &gammaList);
// execution
virtual void execute(void);
};
@@ -90,7 +117,7 @@ TMeson<FImpl1, FImpl2>::TMeson(const std::string name)
template <typename FImpl1, typename FImpl2>
std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void)
{
std::vector<std::string> input = {par().q1, par().q2};
std::vector<std::string> input = {par().q1, par().q2, par().sink};
return input;
}
@@ -103,7 +130,35 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void)
return output;
}
template <typename FImpl1, typename FImpl2>
void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList)
{
gammaList.clear();
// Determine gamma matrices to insert at source/sink.
if (par().gammas.compare("all") == 0)
{
// Do all contractions.
for (unsigned int i = 1; i < Gamma::nGamma; i += 2)
{
for (unsigned int j = 1; j < Gamma::nGamma; j += 2)
{
gammaList.push_back(std::make_pair((Gamma::Algebra)i,
(Gamma::Algebra)j));
}
}
}
else
{
// Parse individual contractions from input string.
gammaList = strToVec<GammaPair>(par().gammas);
}
}
// execution ///////////////////////////////////////////////////////////////////
#define mesonConnected(q1, q2, gSnk, gSrc) \
(g5*(gSnk))*(q1)*(adj(gSrc)*g5)*adj(q2)
template <typename FImpl1, typename FImpl2>
void TMeson<FImpl1, FImpl2>::execute(void)
{
@@ -111,21 +166,73 @@ void TMeson<FImpl1, FImpl2>::execute(void)
<< " quarks '" << par().q1 << "' and '" << par().q2 << "'"
<< std::endl;
XmlWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
LatticeComplex c(env().getGrid());
Gamma gSrc(par().gammaSource), gSnk(par().gammaSink);
Gamma g5(Gamma::Algebra::Gamma5);
std::vector<TComplex> buf;
Result result;
CorrWriter writer(par().output);
std::vector<TComplex> buf;
std::vector<Result> result;
Gamma g5(Gamma::Algebra::Gamma5);
std::vector<GammaPair> gammaList;
int nt = env().getDim(Tp);
c = trace(gSnk*q1*adj(gSrc)*g5*adj(q2)*g5);
sliceSum(c, buf, Tp);
result.corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
parseGammaString(gammaList);
result.resize(gammaList.size());
for (unsigned int i = 0; i < result.size(); ++i)
{
result.corr[t] = TensorRemove(buf[t]);
result[i].gamma_snk = gammaList[i].first;
result[i].gamma_src = gammaList[i].second;
result[i].corr.resize(nt);
}
if (env().template isObjectOfType<SlicedPropagator1>(par().q1) and
env().template isObjectOfType<SlicedPropagator2>(par().q2))
{
SlicedPropagator1 &q1 = *env().template getObject<SlicedPropagator1>(par().q1);
SlicedPropagator2 &q2 = *env().template getObject<SlicedPropagator2>(par().q2);
LOG(Message) << "(propagator already sinked)" << std::endl;
for (unsigned int i = 0; i < result.size(); ++i)
{
Gamma gSnk(gammaList[i].first);
Gamma gSrc(gammaList[i].second);
for (unsigned int t = 0; t < buf.size(); ++t)
{
result[i].corr[t] = TensorRemove(trace(mesonConnected(q1[t], q2[t], gSnk, gSrc)));
}
}
}
else
{
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
LatticeComplex c(env().getGrid());
LOG(Message) << "(using sink '" << par().sink << "')" << std::endl;
for (unsigned int i = 0; i < result.size(); ++i)
{
Gamma gSnk(gammaList[i].first);
Gamma gSrc(gammaList[i].second);
std::string ns;
ns = env().getModuleNamespace(env().getObjectModule(par().sink));
if (ns == "MSource")
{
PropagatorField1 &sink =
*env().template getObject<PropagatorField1>(par().sink);
c = trace(mesonConnected(q1, q2, gSnk, gSrc)*sink);
sliceSum(c, buf, Tp);
}
else if (ns == "MSink")
{
SinkFnScalar &sink = *env().template getObject<SinkFnScalar>(par().sink);
c = trace(mesonConnected(q1, q2, gSnk, gSrc));
buf = sink(c);
}
for (unsigned int t = 0; t < buf.size(); ++t)
{
result[i].corr[t] = TensorRemove(buf[t]);
}
}
}
write(writer, "meson", result);
}
@@ -134,4 +241,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Meson_hpp_
#endif // Hadrons_MContraction_Meson_hpp_
@@ -0,0 +1,114 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MContraction_WeakHamiltonian_hpp_
#define Hadrons_MContraction_WeakHamiltonian_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakHamiltonian *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
/*******************************************************************************
* Utilities for contractions involving the Weak Hamiltonian.
******************************************************************************/
//// Sum and store correlator.
#define MAKE_DIAG(exp, buf, res, n)\
sliceSum(exp, buf, Tp);\
res.name = (n);\
res.corr.resize(buf.size());\
for (unsigned int t = 0; t < buf.size(); ++t)\
{\
res.corr[t] = TensorRemove(buf[t]);\
}
//// Contraction of mu index: use 'mu' variable in exp.
#define SUM_MU(buf,exp)\
buf = zero;\
for (unsigned int mu = 0; mu < ndim; ++mu)\
{\
buf += exp;\
}
enum
{
i_V = 0,
i_A = 1,
n_i = 2
};
class WeakHamiltonianPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(WeakHamiltonianPar,
std::string, q1,
std::string, q2,
std::string, q3,
std::string, q4,
std::string, output);
};
#define MAKE_WEAK_MODULE(modname)\
class T##modname: public Module<WeakHamiltonianPar>\
{\
public:\
FERM_TYPE_ALIASES(FIMPL,)\
class Result: Serializable\
{\
public:\
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,\
std::string, name,\
std::vector<Complex>, corr);\
};\
public:\
/* constructor */ \
T##modname(const std::string name);\
/* destructor */ \
virtual ~T##modname(void) = default;\
/* dependency relation */ \
virtual std::vector<std::string> getInput(void);\
virtual std::vector<std::string> getOutput(void);\
/* setup */ \
virtual void setup(void);\
/* execution */ \
virtual void execute(void);\
std::vector<std::string> VA_label = {"V", "A"};\
};\
MODULE_REGISTER_NS(modname, T##modname, MContraction);
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MContraction_WeakHamiltonian_hpp_
@@ -0,0 +1,137 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MContraction;
/*
* Weak Hamiltonian current-current contractions, Eye-type.
*
* These contractions are generated by the Q1 and Q2 operators in the physical
* basis (see e.g. Fig 3 of arXiv:1507.03094).
*
* Schematics: q4 |
* /-<-¬ |
* / \ | q2 q3
* \ / | /----<------*------<----¬
* q2 \ / q3 | / /-*-¬ \
* /-----<-----* *-----<----¬ | / / \ \
* i * H_W * f | i * \ / q4 * f
* \ / | \ \->-/ /
* \ / | \ /
* \---------->---------/ | \----------->----------/
* q1 | q1
* |
* Saucer (S) | Eye (E)
*
* S: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1]*q4*gL[mu][p_2])
* E: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1])*trace(q4*gL[mu][p_2])
*/
/******************************************************************************
* TWeakHamiltonianEye implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TWeakHamiltonianEye::TWeakHamiltonianEye(const std::string name)
: Module<WeakHamiltonianPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TWeakHamiltonianEye::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
return in;
}
std::vector<std::string> TWeakHamiltonianEye::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TWeakHamiltonianEye::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
void TWeakHamiltonianEye::execute(void)
{
LOG(Message) << "Computing Weak Hamiltonian (Eye type) contractions '"
<< getName() << "' using quarks '" << par().q1 << "', '"
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
<< "'." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
LatticeComplex expbuf(env().getGrid());
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_eye_diag);
unsigned int ndim = env().getNd();
PropagatorField tmp1(env().getGrid());
LatticeComplex tmp2(env().getGrid());
std::vector<PropagatorField> S_body(ndim, tmp1);
std::vector<PropagatorField> S_loop(ndim, tmp1);
std::vector<LatticeComplex> E_body(ndim, tmp2);
std::vector<LatticeComplex> E_loop(ndim, tmp2);
// Setup for S-type contractions.
for (int mu = 0; mu < ndim; ++mu)
{
S_body[mu] = MAKE_SE_BODY(q1, q2, q3, GammaL(Gamma::gmu[mu]));
S_loop[mu] = MAKE_SE_LOOP(q4, GammaL(Gamma::gmu[mu]));
}
// Perform S-type contractions.
SUM_MU(expbuf, trace(S_body[mu]*S_loop[mu]))
MAKE_DIAG(expbuf, corrbuf, result[S_diag], "HW_S")
// Recycle sub-expressions for E-type contractions.
for (unsigned int mu = 0; mu < ndim; ++mu)
{
E_body[mu] = trace(S_body[mu]);
E_loop[mu] = trace(S_loop[mu]);
}
// Perform E-type contractions.
SUM_MU(expbuf, E_body[mu]*E_loop[mu])
MAKE_DIAG(expbuf, corrbuf, result[E_diag], "HW_E")
write(writer, "HW_Eye", result);
}
@@ -0,0 +1,58 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MContraction_WeakHamiltonianEye_hpp_
#define Hadrons_MContraction_WeakHamiltonianEye_hpp_
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakHamiltonianEye *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
enum
{
S_diag = 0,
E_diag = 1,
n_eye_diag = 2
};
// Saucer and Eye subdiagram contractions.
#define MAKE_SE_BODY(Q_1, Q_2, Q_3, gamma) (Q_3*g5*Q_1*adj(Q_2)*g5*gamma)
#define MAKE_SE_LOOP(Q_loop, gamma) (Q_loop*gamma)
MAKE_WEAK_MODULE(WeakHamiltonianEye)
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MContraction_WeakHamiltonianEye_hpp_
@@ -0,0 +1,139 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MContraction;
/*
* Weak Hamiltonian current-current contractions, Non-Eye-type.
*
* These contractions are generated by the Q1 and Q2 operators in the physical
* basis (see e.g. Fig 3 of arXiv:1507.03094).
*
* Schematic:
* q2 q3 | q2 q3
* /--<--¬ /--<--¬ | /--<--¬ /--<--¬
* / \ / \ | / \ / \
* / \ / \ | / \ / \
* / \ / \ | / \ / \
* i * * H_W * f | i * * * H_W * f
* \ * | | \ / \ /
* \ / \ / | \ / \ /
* \ / \ / | \ / \ /
* \ / \ / | \-->--/ \-->--/
* \-->--/ \-->--/ | q1 q4
* q1 q4 |
* Connected (C) | Wing (W)
*
* C: trace(q1*adj(q2)*g5*gL[mu]*q3*adj(q4)*g5*gL[mu])
* W: trace(q1*adj(q2)*g5*gL[mu])*trace(q3*adj(q4)*g5*gL[mu])
*
*/
/******************************************************************************
* TWeakHamiltonianNonEye implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TWeakHamiltonianNonEye::TWeakHamiltonianNonEye(const std::string name)
: Module<WeakHamiltonianPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TWeakHamiltonianNonEye::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
return in;
}
std::vector<std::string> TWeakHamiltonianNonEye::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TWeakHamiltonianNonEye::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
void TWeakHamiltonianNonEye::execute(void)
{
LOG(Message) << "Computing Weak Hamiltonian (Non-Eye type) contractions '"
<< getName() << "' using quarks '" << par().q1 << "', '"
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
<< "'." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
LatticeComplex expbuf(env().getGrid());
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_noneye_diag);
unsigned int ndim = env().getNd();
PropagatorField tmp1(env().getGrid());
LatticeComplex tmp2(env().getGrid());
std::vector<PropagatorField> C_i_side_loop(ndim, tmp1);
std::vector<PropagatorField> C_f_side_loop(ndim, tmp1);
std::vector<LatticeComplex> W_i_side_loop(ndim, tmp2);
std::vector<LatticeComplex> W_f_side_loop(ndim, tmp2);
// Setup for C-type contractions.
for (int mu = 0; mu < ndim; ++mu)
{
C_i_side_loop[mu] = MAKE_CW_SUBDIAG(q1, q2, GammaL(Gamma::gmu[mu]));
C_f_side_loop[mu] = MAKE_CW_SUBDIAG(q3, q4, GammaL(Gamma::gmu[mu]));
}
// Perform C-type contractions.
SUM_MU(expbuf, trace(C_i_side_loop[mu]*C_f_side_loop[mu]))
MAKE_DIAG(expbuf, corrbuf, result[C_diag], "HW_C")
// Recycle sub-expressions for W-type contractions.
for (unsigned int mu = 0; mu < ndim; ++mu)
{
W_i_side_loop[mu] = trace(C_i_side_loop[mu]);
W_f_side_loop[mu] = trace(C_f_side_loop[mu]);
}
// Perform W-type contractions.
SUM_MU(expbuf, W_i_side_loop[mu]*W_f_side_loop[mu])
MAKE_DIAG(expbuf, corrbuf, result[W_diag], "HW_W")
write(writer, "HW_NonEye", result);
}
@@ -0,0 +1,57 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MContraction_WeakHamiltonianNonEye_hpp_
#define Hadrons_MContraction_WeakHamiltonianNonEye_hpp_
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakHamiltonianNonEye *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
enum
{
W_diag = 0,
C_diag = 1,
n_noneye_diag = 2
};
// Wing and Connected subdiagram contractions
#define MAKE_CW_SUBDIAG(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma)
MAKE_WEAK_MODULE(WeakHamiltonianNonEye)
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MContraction_WeakHamiltonianNonEye_hpp_
@@ -0,0 +1,135 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MContraction;
/*
* Weak Hamiltonian + current contractions, disconnected topology for neutral
* mesons.
*
* These contractions are generated by operators Q_1,...,10 of the dS=1 Weak
* Hamiltonian in the physical basis and an additional current J (see e.g.
* Fig 11 of arXiv:1507.03094).
*
* Schematic:
*
* q2 q4 q3
* /--<--¬ /---<--¬ /---<--¬
* / \ / \ / \
* i * * H_W | J * * f
* \ / \ / \ /
* \--->---/ \-------/ \------/
* q1
*
* options
* - q1: input propagator 1 (string)
* - q2: input propagator 2 (string)
* - q3: input propagator 3 (string), assumed to be sequential propagator
* - q4: input propagator 4 (string), assumed to be a loop
*
* type 1: trace(q1*adj(q2)*g5*gL[mu])*trace(loop*gL[mu])*trace(q3*g5)
* type 2: trace(q1*adj(q2)*g5*gL[mu]*loop*gL[mu])*trace(q3*g5)
*/
/*******************************************************************************
* TWeakNeutral4ptDisc implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TWeakNeutral4ptDisc::TWeakNeutral4ptDisc(const std::string name)
: Module<WeakHamiltonianPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TWeakNeutral4ptDisc::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
return in;
}
std::vector<std::string> TWeakNeutral4ptDisc::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TWeakNeutral4ptDisc::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
void TWeakNeutral4ptDisc::execute(void)
{
LOG(Message) << "Computing Weak Hamiltonian neutral disconnected contractions '"
<< getName() << "' using quarks '" << par().q1 << "', '"
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
<< "'." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
LatticeComplex expbuf(env().getGrid());
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_neut_disc_diag);
unsigned int ndim = env().getNd();
PropagatorField tmp(env().getGrid());
std::vector<PropagatorField> meson(ndim, tmp);
std::vector<PropagatorField> loop(ndim, tmp);
LatticeComplex curr(env().getGrid());
// Setup for type 1 contractions.
for (int mu = 0; mu < ndim; ++mu)
{
meson[mu] = MAKE_DISC_MESON(q1, q2, GammaL(Gamma::gmu[mu]));
loop[mu] = MAKE_DISC_LOOP(q4, GammaL(Gamma::gmu[mu]));
}
curr = MAKE_DISC_CURR(q3, GammaL(Gamma::Algebra::Gamma5));
// Perform type 1 contractions.
SUM_MU(expbuf, trace(meson[mu]*loop[mu]))
expbuf *= curr;
MAKE_DIAG(expbuf, corrbuf, result[neut_disc_1_diag], "HW_disc0_1")
// Perform type 2 contractions.
SUM_MU(expbuf, trace(meson[mu])*trace(loop[mu]))
expbuf *= curr;
MAKE_DIAG(expbuf, corrbuf, result[neut_disc_2_diag], "HW_disc0_2")
write(writer, "HW_disc0", result);
}
@@ -0,0 +1,59 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MContraction_WeakNeutral4ptDisc_hpp_
#define Hadrons_MContraction_WeakNeutral4ptDisc_hpp_
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakNeutral4ptDisc *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
enum
{
neut_disc_1_diag = 0,
neut_disc_2_diag = 1,
n_neut_disc_diag = 2
};
// Neutral 4pt disconnected subdiagram contractions.
#define MAKE_DISC_MESON(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma)
#define MAKE_DISC_LOOP(Q_LOOP, gamma) (Q_LOOP*gamma)
#define MAKE_DISC_CURR(Q_c, gamma) (trace(Q_c*gamma))
MAKE_WEAK_MODULE(WeakNeutral4ptDisc)
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MContraction_WeakNeutral4ptDisc_hpp_
@@ -1,34 +1,5 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/Quark.hpp
Copyright (C) 2015
Copyright (C) 2016
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Quark_hpp_
#define Hadrons_Quark_hpp_
#ifndef Hadrons_MFermion_GaugeProp_hpp_
#define Hadrons_MFermion_GaugeProp_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
@@ -37,27 +8,29 @@ See the full license in the file "LICENSE" in the top level distribution directo
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* TQuark *
* GaugeProp *
******************************************************************************/
class QuarkPar: Serializable
BEGIN_MODULE_NAMESPACE(MFermion)
class GaugePropPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(QuarkPar,
GRID_SERIALIZABLE_CLASS_MEMBERS(GaugePropPar,
std::string, source,
std::string, solver);
};
template <typename FImpl>
class TQuark: public Module<QuarkPar>
class TGaugeProp: public Module<GaugePropPar>
{
public:
TYPE_ALIASES(FImpl,);
FGS_TYPE_ALIASES(FImpl,);
public:
// constructor
TQuark(const std::string name);
TGaugeProp(const std::string name);
// destructor
virtual ~TQuark(void) = default;
// dependencies/products
virtual ~TGaugeProp(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
@@ -69,20 +42,20 @@ private:
SolverFn *solver_{nullptr};
};
MODULE_REGISTER(Quark, TQuark<FIMPL>);
MODULE_REGISTER_NS(GaugeProp, TGaugeProp<FIMPL>, MFermion);
/******************************************************************************
* TQuark implementation *
* TGaugeProp implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TQuark<FImpl>::TQuark(const std::string name)
: Module(name)
TGaugeProp<FImpl>::TGaugeProp(const std::string name)
: Module<GaugePropPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TQuark<FImpl>::getInput(void)
std::vector<std::string> TGaugeProp<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().source, par().solver};
@@ -90,7 +63,7 @@ std::vector<std::string> TQuark<FImpl>::getInput(void)
}
template <typename FImpl>
std::vector<std::string> TQuark<FImpl>::getOutput(void)
std::vector<std::string> TGaugeProp<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName(), getName() + "_5d"};
@@ -99,7 +72,7 @@ std::vector<std::string> TQuark<FImpl>::getOutput(void)
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TQuark<FImpl>::setup(void)
void TGaugeProp<FImpl>::setup(void)
{
Ls_ = env().getObjectLs(par().solver);
env().template registerLattice<PropagatorField>(getName());
@@ -111,13 +84,13 @@ void TQuark<FImpl>::setup(void)
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TQuark<FImpl>::execute(void)
void TGaugeProp<FImpl>::execute(void)
{
LOG(Message) << "Computing quark propagator '" << getName() << "'"
<< std::endl;
<< std::endl;
FermionField source(env().getGrid(Ls_)), sol(env().getGrid(Ls_)),
tmp(env().getGrid());
tmp(env().getGrid());
std::string propName = (Ls_ == 1) ? getName() : (getName() + "_5d");
PropagatorField &prop = *env().template createLattice<PropagatorField>(propName);
PropagatorField &fullSrc = *env().template getObject<PropagatorField>(par().source);
@@ -128,7 +101,7 @@ void TQuark<FImpl>::execute(void)
}
LOG(Message) << "Inverting using solver '" << par().solver
<< "' on source '" << par().source << "'" << std::endl;
<< "' on source '" << par().source << "'" << std::endl;
for (unsigned int s = 0; s < Ns; ++s)
for (unsigned int c = 0; c < Nc; ++c)
{
@@ -170,16 +143,18 @@ void TQuark<FImpl>::execute(void)
if (Ls_ > 1)
{
PropagatorField &p4d =
*env().template getObject<PropagatorField>(getName());
*env().template getObject<PropagatorField>(getName());
axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0);
axpby_ssp_pplus(sol, 0., sol, 1., sol, 0, Ls_-1);
axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1);
ExtractSlice(tmp, sol, 0, 0);
FermToProp(p4d, tmp, s, c);
}
}
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Quark_hpp_
#endif // Hadrons_MFermion_GaugeProp_hpp_
+2 -2
View File
@@ -65,7 +65,7 @@ void TLoad::setup(void)
// execution ///////////////////////////////////////////////////////////////////
void TLoad::execute(void)
{
NerscField header;
FieldMetaData header;
std::string fileName = par().file + "."
+ std::to_string(env().getTrajectory());
@@ -74,5 +74,5 @@ void TLoad::execute(void)
LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName());
NerscIO::readConfiguration(U, header, fileName);
LOG(Message) << "NERSC header:" << std::endl;
dump_nersc_header(header, LOG(Message));
dump_meta_data(header, LOG(Message));
}
+3 -3
View File
@@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Load_hpp_
#define Hadrons_Load_hpp_
#ifndef Hadrons_MGauge_Load_hpp_
#define Hadrons_MGauge_Load_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
@@ -70,4 +70,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Load_hpp_
#endif // Hadrons_MGauge_Load_hpp_
+3 -3
View File
@@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Random_hpp_
#define Hadrons_Random_hpp_
#ifndef Hadrons_MGauge_Random_hpp_
#define Hadrons_MGauge_Random_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
@@ -63,4 +63,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Random_hpp_
#endif // Hadrons_MGauge_Random_hpp_
+88
View File
@@ -0,0 +1,88 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MGauge/StochEm.cc
Copyright (C) 2015
Copyright (C) 2016
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MGauge/StochEm.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MGauge;
/******************************************************************************
* TStochEm implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TStochEm::TStochEm(const std::string name)
: Module<StochEmPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TStochEm::getInput(void)
{
std::vector<std::string> in;
return in;
}
std::vector<std::string> TStochEm::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TStochEm::setup(void)
{
if (!env().hasRegisteredObject("_" + getName() + "_weight"))
{
env().registerLattice<EmComp>("_" + getName() + "_weight");
}
env().registerLattice<EmField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
void TStochEm::execute(void)
{
PhotonR photon(par().gauge, par().zmScheme);
EmField &a = *env().createLattice<EmField>(getName());
EmComp *w;
if (!env().hasCreatedObject("_" + getName() + "_weight"))
{
LOG(Message) << "Caching stochatic EM potential weight (gauge: "
<< par().gauge << ", zero-mode scheme: "
<< par().zmScheme << ")..." << std::endl;
w = env().createLattice<EmComp>("_" + getName() + "_weight");
photon.StochasticWeight(*w);
}
else
{
w = env().getObject<EmComp>("_" + getName() + "_weight");
}
LOG(Message) << "Generating stochatic EM potential..." << std::endl;
photon.StochasticField(a, *env().get4dRng(), *w);
}
+75
View File
@@ -0,0 +1,75 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MGauge/StochEm.hpp
Copyright (C) 2015
Copyright (C) 2016
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MGauge_StochEm_hpp_
#define Hadrons_MGauge_StochEm_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* StochEm *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MGauge)
class StochEmPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(StochEmPar,
PhotonR::Gauge, gauge,
PhotonR::ZmScheme, zmScheme);
};
class TStochEm: public Module<StochEmPar>
{
public:
typedef PhotonR::GaugeField EmField;
typedef PhotonR::GaugeLinkField EmComp;
public:
// constructor
TStochEm(const std::string name);
// destructor
virtual ~TStochEm(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(StochEm, TStochEm, MGauge);
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MGauge_StochEm_hpp_
+3 -3
View File
@@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Unit_hpp_
#define Hadrons_Unit_hpp_
#ifndef Hadrons_MGauge_Unit_hpp_
#define Hadrons_MGauge_Unit_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
@@ -63,4 +63,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Unit_hpp_
#endif // Hadrons_MGauge_Unit_hpp_
+132
View File
@@ -0,0 +1,132 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MLoop/NoiseLoop.hpp
Copyright (C) 2016
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MLoop_NoiseLoop_hpp_
#define Hadrons_MLoop_NoiseLoop_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
Noise loop propagator
-----------------------------
* loop_x = q_x * adj(eta_x)
* options:
- q = Result of inversion on noise source.
- eta = noise source.
*/
/******************************************************************************
* NoiseLoop *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MLoop)
class NoiseLoopPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(NoiseLoopPar,
std::string, q,
std::string, eta);
};
template <typename FImpl>
class TNoiseLoop: public Module<NoiseLoopPar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
public:
// constructor
TNoiseLoop(const std::string name);
// destructor
virtual ~TNoiseLoop(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(NoiseLoop, TNoiseLoop<FIMPL>, MLoop);
/******************************************************************************
* TNoiseLoop implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TNoiseLoop<FImpl>::TNoiseLoop(const std::string name)
: Module<NoiseLoopPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TNoiseLoop<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().q, par().eta};
return in;
}
template <typename FImpl>
std::vector<std::string> TNoiseLoop<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TNoiseLoop<FImpl>::setup(void)
{
env().template registerLattice<PropagatorField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TNoiseLoop<FImpl>::execute(void)
{
PropagatorField &loop = *env().template createLattice<PropagatorField>(getName());
PropagatorField &q = *env().template getObject<PropagatorField>(par().q);
PropagatorField &eta = *env().template getObject<PropagatorField>(par().eta);
loop = q*adj(eta);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MLoop_NoiseLoop_hpp_
@@ -0,0 +1,226 @@
#include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp>
#include <Grid/Hadrons/Modules/MScalar/Scalar.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MScalar;
/******************************************************************************
* TChargedProp implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TChargedProp::TChargedProp(const std::string name)
: Module<ChargedPropPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TChargedProp::getInput(void)
{
std::vector<std::string> in = {par().source, par().emField};
return in;
}
std::vector<std::string> TChargedProp::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TChargedProp::setup(void)
{
freeMomPropName_ = FREEMOMPROP(par().mass);
phaseName_.clear();
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
{
phaseName_.push_back("_shiftphase_" + std::to_string(mu));
}
GFSrcName_ = "_" + getName() + "_DinvSrc";
if (!env().hasRegisteredObject(freeMomPropName_))
{
env().registerLattice<ScalarField>(freeMomPropName_);
}
if (!env().hasRegisteredObject(phaseName_[0]))
{
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
{
env().registerLattice<ScalarField>(phaseName_[mu]);
}
}
if (!env().hasRegisteredObject(GFSrcName_))
{
env().registerLattice<ScalarField>(GFSrcName_);
}
env().registerLattice<ScalarField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
void TChargedProp::execute(void)
{
// CACHING ANALYTIC EXPRESSIONS
ScalarField &source = *env().getObject<ScalarField>(par().source);
Complex ci(0.0,1.0);
FFT fft(env().getGrid());
// cache free scalar propagator
if (!env().hasCreatedObject(freeMomPropName_))
{
LOG(Message) << "Caching momentum space free scalar propagator"
<< " (mass= " << par().mass << ")..." << std::endl;
freeMomProp_ = env().createLattice<ScalarField>(freeMomPropName_);
SIMPL::MomentumSpacePropagator(*freeMomProp_, par().mass);
}
else
{
freeMomProp_ = env().getObject<ScalarField>(freeMomPropName_);
}
// cache G*F*src
if (!env().hasCreatedObject(GFSrcName_))
{
GFSrc_ = env().createLattice<ScalarField>(GFSrcName_);
fft.FFT_all_dim(*GFSrc_, source, FFT::forward);
*GFSrc_ = (*freeMomProp_)*(*GFSrc_);
}
else
{
GFSrc_ = env().getObject<ScalarField>(GFSrcName_);
}
// cache phases
if (!env().hasCreatedObject(phaseName_[0]))
{
std::vector<int> &l = env().getGrid()->_fdimensions;
LOG(Message) << "Caching shift phases..." << std::endl;
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
{
Real twoPiL = M_PI*2./l[mu];
phase_.push_back(env().createLattice<ScalarField>(phaseName_[mu]));
LatticeCoordinate(*(phase_[mu]), mu);
*(phase_[mu]) = exp(ci*twoPiL*(*(phase_[mu])));
}
}
else
{
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
{
phase_.push_back(env().getObject<ScalarField>(phaseName_[mu]));
}
}
// PROPAGATOR CALCULATION
LOG(Message) << "Computing charged scalar propagator"
<< " (mass= " << par().mass
<< ", charge= " << par().charge << ")..." << std::endl;
ScalarField &prop = *env().createLattice<ScalarField>(getName());
ScalarField buf(env().getGrid());
ScalarField &GFSrc = *GFSrc_, &G = *freeMomProp_;
double q = par().charge;
// G*F*Src
prop = GFSrc;
// - q*G*momD1*G*F*Src (momD1 = F*D1*Finv)
buf = GFSrc;
momD1(buf, fft);
buf = G*buf;
prop = prop - q*buf;
// + q^2*G*momD1*G*momD1*G*F*Src (here buf = G*momD1*G*F*Src)
momD1(buf, fft);
prop = prop + q*q*G*buf;
// - q^2*G*momD2*G*F*Src (momD2 = F*D2*Finv)
buf = GFSrc;
momD2(buf, fft);
prop = prop - q*q*G*buf;
// final FT
fft.FFT_all_dim(prop, prop, FFT::backward);
// OUTPUT IF NECESSARY
if (!par().output.empty())
{
std::string filename = par().output + "." +
std::to_string(env().getTrajectory());
LOG(Message) << "Saving zero-momentum projection to '"
<< filename << "'..." << std::endl;
CorrWriter writer(filename);
std::vector<TComplex> vecBuf;
std::vector<Complex> result;
sliceSum(prop, vecBuf, Tp);
result.resize(vecBuf.size());
for (unsigned int t = 0; t < vecBuf.size(); ++t)
{
result[t] = TensorRemove(vecBuf[t]);
}
write(writer, "charge", q);
write(writer, "prop", result);
}
}
void TChargedProp::momD1(ScalarField &s, FFT &fft)
{
EmField &A = *env().getObject<EmField>(par().emField);
ScalarField buf(env().getGrid()), result(env().getGrid()),
Amu(env().getGrid());
Complex ci(0.0,1.0);
result = zero;
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
{
Amu = peekLorentz(A, mu);
buf = (*phase_[mu])*s;
fft.FFT_all_dim(buf, buf, FFT::backward);
buf = Amu*buf;
fft.FFT_all_dim(buf, buf, FFT::forward);
result = result - ci*buf;
}
fft.FFT_all_dim(s, s, FFT::backward);
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
{
Amu = peekLorentz(A, mu);
buf = Amu*s;
fft.FFT_all_dim(buf, buf, FFT::forward);
result = result + ci*adj(*phase_[mu])*buf;
}
s = result;
}
void TChargedProp::momD2(ScalarField &s, FFT &fft)
{
EmField &A = *env().getObject<EmField>(par().emField);
ScalarField buf(env().getGrid()), result(env().getGrid()),
Amu(env().getGrid());
result = zero;
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
{
Amu = peekLorentz(A, mu);
buf = (*phase_[mu])*s;
fft.FFT_all_dim(buf, buf, FFT::backward);
buf = Amu*Amu*buf;
fft.FFT_all_dim(buf, buf, FFT::forward);
result = result + .5*buf;
}
fft.FFT_all_dim(s, s, FFT::backward);
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
{
Amu = peekLorentz(A, mu);
buf = Amu*Amu*s;
fft.FFT_all_dim(buf, buf, FFT::forward);
result = result + .5*adj(*phase_[mu])*buf;
}
s = result;
}
@@ -0,0 +1,61 @@
#ifndef Hadrons_MScalar_ChargedProp_hpp_
#define Hadrons_MScalar_ChargedProp_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Charged scalar propagator *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MScalar)
class ChargedPropPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(ChargedPropPar,
std::string, emField,
std::string, source,
double, mass,
double, charge,
std::string, output);
};
class TChargedProp: public Module<ChargedPropPar>
{
public:
SCALAR_TYPE_ALIASES(SIMPL,);
typedef PhotonR::GaugeField EmField;
typedef PhotonR::GaugeLinkField EmComp;
public:
// constructor
TChargedProp(const std::string name);
// destructor
virtual ~TChargedProp(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
private:
void momD1(ScalarField &s, FFT &fft);
void momD2(ScalarField &s, FFT &fft);
private:
std::string freeMomPropName_, GFSrcName_;
std::vector<std::string> phaseName_;
ScalarField *freeMomProp_, *GFSrc_;
std::vector<ScalarField *> phase_;
EmField *A;
};
MODULE_REGISTER_NS(ChargedProp, TChargedProp, MScalar);
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MScalar_ChargedProp_hpp_
@@ -0,0 +1,79 @@
#include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp>
#include <Grid/Hadrons/Modules/MScalar/Scalar.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MScalar;
/******************************************************************************
* TFreeProp implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TFreeProp::TFreeProp(const std::string name)
: Module<FreePropPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TFreeProp::getInput(void)
{
std::vector<std::string> in = {par().source};
return in;
}
std::vector<std::string> TFreeProp::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TFreeProp::setup(void)
{
freeMomPropName_ = FREEMOMPROP(par().mass);
if (!env().hasRegisteredObject(freeMomPropName_))
{
env().registerLattice<ScalarField>(freeMomPropName_);
}
env().registerLattice<ScalarField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
void TFreeProp::execute(void)
{
ScalarField &prop = *env().createLattice<ScalarField>(getName());
ScalarField &source = *env().getObject<ScalarField>(par().source);
ScalarField *freeMomProp;
if (!env().hasCreatedObject(freeMomPropName_))
{
LOG(Message) << "Caching momentum space free scalar propagator"
<< " (mass= " << par().mass << ")..." << std::endl;
freeMomProp = env().createLattice<ScalarField>(freeMomPropName_);
SIMPL::MomentumSpacePropagator(*freeMomProp, par().mass);
}
else
{
freeMomProp = env().getObject<ScalarField>(freeMomPropName_);
}
LOG(Message) << "Computing free scalar propagator..." << std::endl;
SIMPL::FreePropagator(source, prop, *freeMomProp);
if (!par().output.empty())
{
TextWriter writer(par().output + "." +
std::to_string(env().getTrajectory()));
std::vector<TComplex> buf;
std::vector<Complex> result;
sliceSum(prop, buf, Tp);
result.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
{
result[t] = TensorRemove(buf[t]);
}
write(writer, "prop", result);
}
}
@@ -0,0 +1,50 @@
#ifndef Hadrons_MScalar_FreeProp_hpp_
#define Hadrons_MScalar_FreeProp_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* FreeProp *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MScalar)
class FreePropPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(FreePropPar,
std::string, source,
double, mass,
std::string, output);
};
class TFreeProp: public Module<FreePropPar>
{
public:
SCALAR_TYPE_ALIASES(SIMPL,);
public:
// constructor
TFreeProp(const std::string name);
// destructor
virtual ~TFreeProp(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
private:
std::string freeMomPropName_;
};
MODULE_REGISTER_NS(FreeProp, TFreeProp, MScalar);
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MScalar_FreeProp_hpp_
@@ -0,0 +1,6 @@
#ifndef Hadrons_Scalar_hpp_
#define Hadrons_Scalar_hpp_
#define FREEMOMPROP(m) "_scalar_mom_prop_" + std::to_string(m)
#endif // Hadrons_Scalar_hpp_
+114
View File
@@ -0,0 +1,114 @@
#ifndef Hadrons_MSink_Point_hpp_
#define Hadrons_MSink_Point_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Point *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MSink)
class PointPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(PointPar,
std::string, mom);
};
template <typename FImpl>
class TPoint: public Module<PointPar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
SINK_TYPE_ALIASES();
public:
// constructor
TPoint(const std::string name);
// destructor
virtual ~TPoint(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSink);
MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSink);
/******************************************************************************
* TPoint implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TPoint<FImpl>::TPoint(const std::string name)
: Module<PointPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TPoint<FImpl>::getInput(void)
{
std::vector<std::string> in;
return in;
}
template <typename FImpl>
std::vector<std::string> TPoint<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TPoint<FImpl>::setup(void)
{
unsigned int size;
size = env().template lattice4dSize<LatticeComplex>();
env().registerObject(getName(), size);
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TPoint<FImpl>::execute(void)
{
std::vector<Real> p = strToVec<Real>(par().mom);
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
Complex i(0.0,1.0);
LOG(Message) << "Setting up point sink function for momentum ["
<< par().mom << "]" << std::endl;
ph = zero;
for(unsigned int mu = 0; mu < env().getNd(); mu++)
{
LatticeCoordinate(coor, mu);
ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor;
}
ph = exp((Real)(2*M_PI)*i*ph);
auto sink = [ph](const PropagatorField &field)
{
SlicedPropagator res;
PropagatorField tmp = ph*field;
sliceSum(tmp, res, Tp);
return res;
};
env().setObject(getName(), new SinkFn(sink));
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MSink_Point_hpp_
+4 -4
View File
@@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_RBPrecCG_hpp_
#define Hadrons_RBPrecCG_hpp_
#ifndef Hadrons_MSolver_RBPrecCG_hpp_
#define Hadrons_MSolver_RBPrecCG_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
@@ -53,7 +53,7 @@ template <typename FImpl>
class TRBPrecCG: public Module<RBPrecCGPar>
{
public:
TYPE_ALIASES(FImpl,);
FGS_TYPE_ALIASES(FImpl,);
public:
// constructor
TRBPrecCG(const std::string name);
@@ -129,4 +129,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_RBPrecCG_hpp_
#endif // Hadrons_MSolver_RBPrecCG_hpp_
+6 -5
View File
@@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Point_hpp_
#define Hadrons_Point_hpp_
#ifndef Hadrons_MSource_Point_hpp_
#define Hadrons_MSource_Point_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
@@ -63,7 +63,7 @@ template <typename FImpl>
class TPoint: public Module<PointPar>
{
public:
TYPE_ALIASES(FImpl,);
FERM_TYPE_ALIASES(FImpl,);
public:
// constructor
TPoint(const std::string name);
@@ -78,7 +78,8 @@ public:
virtual void execute(void);
};
MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSource);
MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSource);
MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSource);
/******************************************************************************
* TPoint template implementation *
@@ -132,4 +133,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Point_hpp_
#endif // Hadrons_MSource_Point_hpp_
+7 -6
View File
@@ -6,6 +6,7 @@ Source file: extras/Hadrons/Modules/MSource/SeqGamma.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2017
Author: Antonin Portelli <antonin.portelli@me.com>
@@ -27,8 +28,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_SeqGamma_hpp_
#define Hadrons_SeqGamma_hpp_
#ifndef Hadrons_MSource_SeqGamma_hpp_
#define Hadrons_MSource_SeqGamma_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
@@ -71,7 +72,7 @@ template <typename FImpl>
class TSeqGamma: public Module<SeqGammaPar>
{
public:
TYPE_ALIASES(FImpl,);
FGS_TYPE_ALIASES(FImpl,);
public:
// constructor
TSeqGamma(const std::string name);
@@ -149,9 +150,9 @@ void TSeqGamma<FImpl>::execute(void)
for(unsigned int mu = 0; mu < env().getNd(); mu++)
{
LatticeCoordinate(coor, mu);
ph = ph + p[mu]*coor;
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu])));
}
ph = exp(i*ph);
ph = exp((Real)(2*M_PI)*i*ph);
LatticeCoordinate(t, Tp);
src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q);
}
@@ -160,4 +161,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_SeqGamma_hpp_
#endif // Hadrons_MSource_SeqGamma_hpp_
+147
View File
@@ -0,0 +1,147 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MSource/Wall.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MSource_WallSource_hpp_
#define Hadrons_MSource_WallSource_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
Wall source
-----------------------------
* src_x = delta(x_3 - tW) * exp(i x.mom)
* options:
- tW: source timeslice (integer)
- mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.")
*/
/******************************************************************************
* Wall *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MSource)
class WallPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(WallPar,
unsigned int, tW,
std::string, mom);
};
template <typename FImpl>
class TWall: public Module<WallPar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
public:
// constructor
TWall(const std::string name);
// destructor
virtual ~TWall(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Wall, TWall<FIMPL>, MSource);
/******************************************************************************
* TWall implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TWall<FImpl>::TWall(const std::string name)
: Module<WallPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TWall<FImpl>::getInput(void)
{
std::vector<std::string> in;
return in;
}
template <typename FImpl>
std::vector<std::string> TWall<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TWall<FImpl>::setup(void)
{
env().template registerLattice<PropagatorField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TWall<FImpl>::execute(void)
{
LOG(Message) << "Generating wall source at t = " << par().tW
<< " with momentum " << par().mom << std::endl;
PropagatorField &src = *env().template createLattice<PropagatorField>(getName());
Lattice<iScalar<vInteger>> t(env().getGrid());
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
std::vector<Real> p;
Complex i(0.0,1.0);
p = strToVec<Real>(par().mom);
ph = zero;
for(unsigned int mu = 0; mu < Nd; mu++)
{
LatticeCoordinate(coor, mu);
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu])));
}
ph = exp((Real)(2*M_PI)*i*ph);
LatticeCoordinate(t, Tp);
src = 1.;
src = where((t == par().tW), src*ph, 0.*src);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MSource_WallSource_hpp_
+6 -5
View File
@@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Z2_hpp_
#define Hadrons_Z2_hpp_
#ifndef Hadrons_MSource_Z2_hpp_
#define Hadrons_MSource_Z2_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
@@ -67,7 +67,7 @@ template <typename FImpl>
class TZ2: public Module<Z2Par>
{
public:
TYPE_ALIASES(FImpl,);
FERM_TYPE_ALIASES(FImpl,);
public:
// constructor
TZ2(const std::string name);
@@ -82,7 +82,8 @@ public:
virtual void execute(void);
};
MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource);
MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource);
MODULE_REGISTER_NS(ScalarZ2, TZ2<ScalarImplCR>, MSource);
/******************************************************************************
* TZ2 template implementation *
@@ -148,4 +149,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Z2_hpp_
#endif // Hadrons_MSource_Z2_hpp_
@@ -1,5 +1,5 @@
#ifndef Hadrons____FILEBASENAME____hpp_
#define Hadrons____FILEBASENAME____hpp_
#ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_
#define Hadrons____NAMESPACE_______FILEBASENAME____hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
@@ -41,4 +41,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons____FILEBASENAME____hpp_
#endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_
@@ -1,5 +1,5 @@
#ifndef Hadrons____FILEBASENAME____hpp_
#define Hadrons____FILEBASENAME____hpp_
#ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_
#define Hadrons____NAMESPACE_______FILEBASENAME____hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
@@ -82,4 +82,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons____FILEBASENAME____hpp_
#endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_
+22 -3
View File
@@ -1,19 +1,38 @@
modules_cc =\
Modules/MContraction/WeakHamiltonianEye.cc \
Modules/MContraction/WeakHamiltonianNonEye.cc \
Modules/MContraction/WeakNeutral4ptDisc.cc \
Modules/MGauge/Load.cc \
Modules/MGauge/Random.cc \
Modules/MGauge/Unit.cc
Modules/MGauge/StochEm.cc \
Modules/MGauge/Unit.cc \
Modules/MScalar/ChargedProp.cc \
Modules/MScalar/FreeProp.cc
modules_hpp =\
Modules/MAction/DWF.hpp \
Modules/MAction/Wilson.hpp \
Modules/MContraction/Baryon.hpp \
Modules/MContraction/DiscLoop.hpp \
Modules/MContraction/Gamma3pt.hpp \
Modules/MContraction/Meson.hpp \
Modules/MContraction/WeakHamiltonian.hpp \
Modules/MContraction/WeakHamiltonianEye.hpp \
Modules/MContraction/WeakHamiltonianNonEye.hpp \
Modules/MContraction/WeakNeutral4ptDisc.hpp \
Modules/MFermion/GaugeProp.hpp \
Modules/MGauge/Load.hpp \
Modules/MGauge/Random.hpp \
Modules/MGauge/StochEm.hpp \
Modules/MGauge/Unit.hpp \
Modules/MLoop/NoiseLoop.hpp \
Modules/MScalar/ChargedProp.hpp \
Modules/MScalar/FreeProp.hpp \
Modules/MScalar/Scalar.hpp \
Modules/MSink/Point.hpp \
Modules/MSolver/RBPrecCG.hpp \
Modules/MSource/Point.hpp \
Modules/MSource/SeqGamma.hpp \
Modules/MSource/Z2.hpp \
Modules/Quark.hpp
Modules/MSource/Wall.hpp \
Modules/MSource/Z2.hpp
+11
View File
@@ -0,0 +1,11 @@
#include <qed-fvol/Global.hpp>
using namespace Grid;
using namespace QCD;
using namespace QedFVol;
QedFVolLogger QedFVol::QedFVolLogError(1,"Error");
QedFVolLogger QedFVol::QedFVolLogWarning(1,"Warning");
QedFVolLogger QedFVol::QedFVolLogMessage(1,"Message");
QedFVolLogger QedFVol::QedFVolLogIterative(1,"Iterative");
QedFVolLogger QedFVol::QedFVolLogDebug(1,"Debug");
+42
View File
@@ -0,0 +1,42 @@
#ifndef QedFVol_Global_hpp_
#define QedFVol_Global_hpp_
#include <Grid/Grid.h>
#define BEGIN_QEDFVOL_NAMESPACE \
namespace Grid {\
using namespace QCD;\
namespace QedFVol {\
using Grid::operator<<;
#define END_QEDFVOL_NAMESPACE }}
/* the 'using Grid::operator<<;' statement prevents a very nasty compilation
* error with GCC (clang compiles fine without it).
*/
BEGIN_QEDFVOL_NAMESPACE
class QedFVolLogger: public Logger
{
public:
QedFVolLogger(int on, std::string nm): Logger("QedFVol", on, nm,
GridLogColours, "BLACK"){};
};
#define LOG(channel) std::cout << QedFVolLog##channel
#define QEDFVOL_ERROR(msg)\
LOG(Error) << msg << " (" << __FUNCTION__ << " at " << __FILE__ << ":"\
<< __LINE__ << ")" << std::endl;\
abort();
#define DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl;
extern QedFVolLogger QedFVolLogError;
extern QedFVolLogger QedFVolLogWarning;
extern QedFVolLogger QedFVolLogMessage;
extern QedFVolLogger QedFVolLogIterative;
extern QedFVolLogger QedFVolLogDebug;
END_QEDFVOL_NAMESPACE
#endif // QedFVol_Global_hpp_
+9
View File
@@ -0,0 +1,9 @@
AM_CXXFLAGS += -I$(top_srcdir)/extras
bin_PROGRAMS = qed-fvol
qed_fvol_SOURCES = \
qed-fvol.cc \
Global.cc
qed_fvol_LDADD = -lGrid
+265
View File
@@ -0,0 +1,265 @@
#ifndef QEDFVOL_WILSONLOOPS_H
#define QEDFVOL_WILSONLOOPS_H
#include <Global.hpp>
BEGIN_QEDFVOL_NAMESPACE
template <class Gimpl> class NewWilsonLoops : public Gimpl {
public:
INHERIT_GIMPL_TYPES(Gimpl);
typedef typename Gimpl::GaugeLinkField GaugeMat;
typedef typename Gimpl::GaugeField GaugeLorentz;
//////////////////////////////////////////////////
// directed plaquette oriented in mu,nu plane
//////////////////////////////////////////////////
static void dirPlaquette(GaugeMat &plaq, const std::vector<GaugeMat> &U,
const int mu, const int nu) {
// Annoyingly, must use either scope resolution to find dependent base
// class,
// or this-> ; there is no "this" in a static method. This forces explicit
// Gimpl scope
// resolution throughout the usage in this file, and rather defeats the
// purpose of deriving
// from Gimpl.
plaq = Gimpl::CovShiftBackward(
U[mu], mu, Gimpl::CovShiftBackward(
U[nu], nu, Gimpl::CovShiftForward(U[mu], mu, U[nu])));
}
//////////////////////////////////////////////////
// trace of directed plaquette oriented in mu,nu plane
//////////////////////////////////////////////////
static void traceDirPlaquette(LatticeComplex &plaq,
const std::vector<GaugeMat> &U, const int mu,
const int nu) {
GaugeMat sp(U[0]._grid);
dirPlaquette(sp, U, mu, nu);
plaq = trace(sp);
}
//////////////////////////////////////////////////
// sum over all planes of plaquette
//////////////////////////////////////////////////
static void sitePlaquette(LatticeComplex &Plaq,
const std::vector<GaugeMat> &U) {
LatticeComplex sitePlaq(U[0]._grid);
Plaq = zero;
for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) {
for (int nu = 0; nu < mu; nu++) {
traceDirPlaquette(sitePlaq, U, mu, nu);
Plaq = Plaq + sitePlaq;
}
}
}
//////////////////////////////////////////////////
// sum over all x,y,z,t and over all planes of plaquette
//////////////////////////////////////////////////
static Real sumPlaquette(const GaugeLorentz &Umu) {
std::vector<GaugeMat> U(4, Umu._grid);
for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
}
LatticeComplex Plaq(Umu._grid);
sitePlaquette(Plaq, U);
TComplex Tp = sum(Plaq);
Complex p = TensorRemove(Tp);
return p.real();
}
//////////////////////////////////////////////////
// average over all x,y,z,t and over all planes of plaquette
//////////////////////////////////////////////////
static Real avgPlaquette(const GaugeLorentz &Umu) {
int ndim = Umu._grid->_ndimension;
Real sumplaq = sumPlaquette(Umu);
Real vol = Umu._grid->gSites();
Real faces = (1.0 * ndim * (ndim - 1)) / 2.0;
return sumplaq / vol / faces / Nc; // Nc dependent... FIXME
}
//////////////////////////////////////////////////
// Wilson loop of size (R1, R2), oriented in mu,nu plane
//////////////////////////////////////////////////
static void wilsonLoop(GaugeMat &wl, const std::vector<GaugeMat> &U,
const int Rmu, const int Rnu,
const int mu, const int nu) {
wl = U[nu];
for(int i = 0; i < Rnu-1; i++){
wl = Gimpl::CovShiftForward(U[nu], nu, wl);
}
for(int i = 0; i < Rmu; i++){
wl = Gimpl::CovShiftForward(U[mu], mu, wl);
}
for(int i = 0; i < Rnu; i++){
wl = Gimpl::CovShiftBackward(U[nu], nu, wl);
}
for(int i = 0; i < Rmu; i++){
wl = Gimpl::CovShiftBackward(U[mu], mu, wl);
}
}
//////////////////////////////////////////////////
// trace of Wilson Loop oriented in mu,nu plane
//////////////////////////////////////////////////
static void traceWilsonLoop(LatticeComplex &wl,
const std::vector<GaugeMat> &U,
const int Rmu, const int Rnu,
const int mu, const int nu) {
GaugeMat sp(U[0]._grid);
wilsonLoop(sp, U, Rmu, Rnu, mu, nu);
wl = trace(sp);
}
//////////////////////////////////////////////////
// sum over all planes of Wilson loop
//////////////////////////////////////////////////
static void siteWilsonLoop(LatticeComplex &Wl,
const std::vector<GaugeMat> &U,
const int R1, const int R2) {
LatticeComplex siteWl(U[0]._grid);
Wl = zero;
for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) {
for (int nu = 0; nu < mu; nu++) {
traceWilsonLoop(siteWl, U, R1, R2, mu, nu);
Wl = Wl + siteWl;
traceWilsonLoop(siteWl, U, R2, R1, mu, nu);
Wl = Wl + siteWl;
}
}
}
//////////////////////////////////////////////////
// sum over planes of Wilson loop with length R1
// in the time direction
//////////////////////////////////////////////////
static void siteTimelikeWilsonLoop(LatticeComplex &Wl,
const std::vector<GaugeMat> &U,
const int R1, const int R2) {
LatticeComplex siteWl(U[0]._grid);
int ndim = U[0]._grid->_ndimension;
Wl = zero;
for (int nu = 0; nu < ndim - 1; nu++) {
traceWilsonLoop(siteWl, U, R1, R2, ndim-1, nu);
Wl = Wl + siteWl;
}
}
//////////////////////////////////////////////////
// sum Wilson loop over all planes orthogonal to the time direction
//////////////////////////////////////////////////
static void siteSpatialWilsonLoop(LatticeComplex &Wl,
const std::vector<GaugeMat> &U,
const int R1, const int R2) {
LatticeComplex siteWl(U[0]._grid);
Wl = zero;
for (int mu = 1; mu < U[0]._grid->_ndimension - 1; mu++) {
for (int nu = 0; nu < mu; nu++) {
traceWilsonLoop(siteWl, U, R1, R2, mu, nu);
Wl = Wl + siteWl;
traceWilsonLoop(siteWl, U, R2, R1, mu, nu);
Wl = Wl + siteWl;
}
}
}
//////////////////////////////////////////////////
// sum over all x,y,z,t and over all planes of Wilson loop
//////////////////////////////////////////////////
static Real sumWilsonLoop(const GaugeLorentz &Umu,
const int R1, const int R2) {
std::vector<GaugeMat> U(4, Umu._grid);
for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
}
LatticeComplex Wl(Umu._grid);
siteWilsonLoop(Wl, U, R1, R2);
TComplex Tp = sum(Wl);
Complex p = TensorRemove(Tp);
return p.real();
}
//////////////////////////////////////////////////
// sum over all x,y,z,t and over all planes of timelike Wilson loop
//////////////////////////////////////////////////
static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu,
const int R1, const int R2) {
std::vector<GaugeMat> U(4, Umu._grid);
for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
}
LatticeComplex Wl(Umu._grid);
siteTimelikeWilsonLoop(Wl, U, R1, R2);
TComplex Tp = sum(Wl);
Complex p = TensorRemove(Tp);
return p.real();
}
//////////////////////////////////////////////////
// sum over all x,y,z,t and over all planes of spatial Wilson loop
//////////////////////////////////////////////////
static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu,
const int R1, const int R2) {
std::vector<GaugeMat> U(4, Umu._grid);
for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
}
LatticeComplex Wl(Umu._grid);
siteSpatialWilsonLoop(Wl, U, R1, R2);
TComplex Tp = sum(Wl);
Complex p = TensorRemove(Tp);
return p.real();
}
//////////////////////////////////////////////////
// average over all x,y,z,t and over all planes of Wilson loop
//////////////////////////////////////////////////
static Real avgWilsonLoop(const GaugeLorentz &Umu,
const int R1, const int R2) {
int ndim = Umu._grid->_ndimension;
Real sumWl = sumWilsonLoop(Umu, R1, R2);
Real vol = Umu._grid->gSites();
Real faces = 1.0 * ndim * (ndim - 1);
return sumWl / vol / faces / Nc; // Nc dependent... FIXME
}
//////////////////////////////////////////////////
// average over all x,y,z,t and over all planes of timelike Wilson loop
//////////////////////////////////////////////////
static Real avgTimelikeWilsonLoop(const GaugeLorentz &Umu,
const int R1, const int R2) {
int ndim = Umu._grid->_ndimension;
Real sumWl = sumTimelikeWilsonLoop(Umu, R1, R2);
Real vol = Umu._grid->gSites();
Real faces = 1.0 * (ndim - 1);
return sumWl / vol / faces / Nc; // Nc dependent... FIXME
}
//////////////////////////////////////////////////
// average over all x,y,z,t and over all planes of spatial Wilson loop
//////////////////////////////////////////////////
static Real avgSpatialWilsonLoop(const GaugeLorentz &Umu,
const int R1, const int R2) {
int ndim = Umu._grid->_ndimension;
Real sumWl = sumSpatialWilsonLoop(Umu, R1, R2);
Real vol = Umu._grid->gSites();
Real faces = 1.0 * (ndim - 1) * (ndim - 2);
return sumWl / vol / faces / Nc; // Nc dependent... FIXME
}
};
END_QEDFVOL_NAMESPACE
#endif // QEDFVOL_WILSONLOOPS_H
+88
View File
@@ -0,0 +1,88 @@
#include <Global.hpp>
#include <WilsonLoops.h>
using namespace Grid;
using namespace QCD;
using namespace QedFVol;
typedef PeriodicGaugeImpl<QedGimplR> QedPeriodicGimplR;
typedef PhotonR::GaugeField EmField;
typedef PhotonR::GaugeLinkField EmComp;
const int NCONFIGS = 10;
const int NWILSON = 10;
int main(int argc, char *argv[])
{
// parse command line
std::string parameterFileName;
if (argc < 2)
{
std::cerr << "usage: " << argv[0] << " <parameter file> [Grid options]";
std::cerr << std::endl;
std::exit(EXIT_FAILURE);
}
parameterFileName = argv[1];
// initialization
Grid_init(&argc, &argv);
QedFVolLogError.Active(GridLogError.isActive());
QedFVolLogWarning.Active(GridLogWarning.isActive());
QedFVolLogMessage.Active(GridLogMessage.isActive());
QedFVolLogIterative.Active(GridLogIterative.isActive());
QedFVolLogDebug.Active(GridLogDebug.isActive());
LOG(Message) << "Grid initialized" << std::endl;
// QED stuff
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(4, vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian grid(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&grid);
PhotonR photon(PhotonR::Gauge::feynman,
PhotonR::ZmScheme::qedL);
EmField a(&grid);
EmField expA(&grid);
Complex imag_unit(0, 1);
Real wlA;
std::vector<Real> logWlAvg(NWILSON, 0.0), logWlTime(NWILSON, 0.0), logWlSpace(NWILSON, 0.0);
pRNG.SeedRandomDevice();
LOG(Message) << "Wilson loop calculation beginning" << std::endl;
for(int ic = 0; ic < NCONFIGS; ic++){
LOG(Message) << "Configuration " << ic <<std::endl;
photon.StochasticField(a, pRNG);
// Exponentiate photon field
expA = exp(imag_unit*a);
// Calculate Wilson loops
for(int iw=1; iw<=NWILSON; iw++){
wlA = NewWilsonLoops<QedPeriodicGimplR>::avgWilsonLoop(expA, iw, iw) * 3;
logWlAvg[iw-1] -= 2*log(wlA);
wlA = NewWilsonLoops<QedPeriodicGimplR>::avgTimelikeWilsonLoop(expA, iw, iw) * 3;
logWlTime[iw-1] -= 2*log(wlA);
wlA = NewWilsonLoops<QedPeriodicGimplR>::avgSpatialWilsonLoop(expA, iw, iw) * 3;
logWlSpace[iw-1] -= 2*log(wlA);
}
}
LOG(Message) << "Wilson loop calculation completed" << std::endl;
// Calculate Wilson loops
for(int iw=1; iw<=10; iw++){
LOG(Message) << iw << 'x' << iw << " Wilson loop" << std::endl;
LOG(Message) << "-2log(W) average: " << logWlAvg[iw-1]/NCONFIGS << std::endl;
LOG(Message) << "-2log(W) timelike: " << logWlTime[iw-1]/NCONFIGS << std::endl;
LOG(Message) << "-2log(W) spatial: " << logWlSpace[iw-1]/NCONFIGS << std::endl;
}
// epilogue
LOG(Message) << "Grid is finalizing now" << std::endl;
Grid_finalize();
return EXIT_SUCCESS;
}
+14 -1
View File
@@ -20,4 +20,17 @@ The simple testcase in this directory is the submitted bug report that encapsula
problem. The test case works with icpc and with clang++, but fails consistently on g++
current variants.
Peter
Peter
************
Second GCC bug reported, see Issue 100.
https://wandbox.org/permlink/tzssJza6R9XnqANw
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80652
Getting Travis fails under gcc-5 for Test_simd, now that I added more comprehensive testing to the
CI test suite. The limitations of Travis runtime limits & weak cores are being shown.
Travis uses 5.4.1 for g++-5.
Executable
+86
View File
@@ -0,0 +1,86 @@
#! /bin/sh
prefix=@prefix@
exec_prefix=@exec_prefix@
includedir=@includedir@
usage()
{
cat <<EOF
Usage: grid-config [OPTION]
Known values for OPTION are:
--prefix show Grid installation prefix
--cxxflags print pre-processor and compiler flags
--ldflags print library linking flags
--libs print library linking information
--summary print full build summary
--help display this help and exit
--version output version information
--git print git revision
EOF
exit $1
}
if test $# -eq 0; then
usage 1
fi
cflags=false
libs=false
while test $# -gt 0; do
case "$1" in
-*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
*) optarg= ;;
esac
case "$1" in
--prefix)
echo $prefix
;;
--version)
echo @VERSION@
exit 0
;;
--git)
echo "@GRID_BRANCH@ @GRID_SHA@"
exit 0
;;
--help)
usage 0
;;
--cxxflags)
echo @GRID_CXXFLAGS@
;;
--ldflags)
echo @GRID_LDFLAGS@
;;
--libs)
echo @GRID_LIBS@
;;
--summary)
echo ""
echo "@GRID_SUMMARY@"
echo ""
;;
*)
usage
exit 1
;;
esac
shift
done
exit 0
-65
View File
@@ -1,65 +0,0 @@
#include <Grid/Grid.h>
namespace Grid {
int PointerCache::victim;
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
void *PointerCache::Insert(void *ptr,size_t bytes) {
if (bytes < 4096 ) return NULL;
#ifdef _OPENMP
assert(omp_in_parallel()==0);
#endif
void * ret = NULL;
int v = -1;
for(int e=0;e<Ncache;e++) {
if ( Entries[e].valid==0 ) {
v=e;
break;
}
}
if ( v==-1 ) {
v=victim;
victim = (victim+1)%Ncache;
}
if ( Entries[v].valid ) {
ret = Entries[v].address;
Entries[v].valid = 0;
Entries[v].address = NULL;
Entries[v].bytes = 0;
}
Entries[v].address=ptr;
Entries[v].bytes =bytes;
Entries[v].valid =1;
return ret;
}
void *PointerCache::Lookup(size_t bytes) {
if (bytes < 4096 ) return NULL;
#ifdef _OPENMP
assert(omp_in_parallel()==0);
#endif
for(int e=0;e<Ncache;e++){
if ( Entries[e].valid && ( Entries[e].bytes == bytes ) ) {
Entries[e].valid = 0;
return Entries[e].address;
}
}
return NULL;
}
}
+37
View File
@@ -0,0 +1,37 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/DisableWarnings.h
Copyright (C) 2016
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef DISABLE_WARNINGS_H
#define DISABLE_WARNINGS_H
//disables and intel compiler specific warning (in json.hpp)
#pragma warning disable 488
#endif
+7 -47
View File
@@ -38,52 +38,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_H
#define GRID_H
///////////////////
// Std C++ dependencies
///////////////////
#include <cassert>
#include <complex>
#include <vector>
#include <iostream>
#include <iomanip>
#include <random>
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>
#include <sys/time.h>
#include <chrono>
///////////////////
// Grid headers
///////////////////
#include "Config.h"
#include <Grid/Timer.h>
#include <Grid/PerfCount.h>
#include <Grid/Log.h>
#include <Grid/AlignedAllocator.h>
#include <Grid/Simd.h>
#include <Grid/serialisation/Serialisation.h>
#include <Grid/Threads.h>
#include <Grid/Lexicographic.h>
#include <Grid/Init.h>
#include <Grid/Communicator.h>
#include <Grid/Cartesian.h>
#include <Grid/Tensors.h>
#include <Grid/Lattice.h>
#include <Grid/Cshift.h>
#include <Grid/Stencil.h>
#include <Grid/Algorithms.h>
#include <Grid/parallelIO/BinaryIO.h>
#include <Grid/FFT.h>
#include <Grid/qcd/QCD.h>
#include <Grid/parallelIO/NerscIO.h>
#include <Grid/qcd/hmc/NerscCheckpointer.h>
#include <Grid/qcd/hmc/HmcRunner.h>
#include <Grid/GridCore.h>
#include <Grid/GridQCDcore.h>
#include <Grid/qcd/action/Action.h>
#include <Grid/qcd/utils/GaugeFix.h>
#include <Grid/qcd/smearing/Smearing.h>
#include <Grid/parallelIO/MetaData.h>
#include <Grid/qcd/hmc/HMC_aggregate.h>
#endif
@@ -2,11 +2,13 @@
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/MatrixUtils.h
Source file: ./lib/Grid.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: azusayamaguchi <ayamaguc@YAMAKAZE.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -25,51 +27,34 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_MATRIX_UTILS_H
#define GRID_MATRIX_UTILS_H
//
// Grid.h
// simd
//
// Created by Peter Boyle on 09/05/2014.
// Copyright (c) 2014 University of Edinburgh. All rights reserved.
//
namespace Grid {
#ifndef GRID_BASE_H
#define GRID_BASE_H
namespace MatrixUtils {
#include <Grid/GridStd.h>
template<class T> inline void Size(Matrix<T>& A,int &N,int &M){
N=A.size(); assert(N>0);
M=A[0].size();
for(int i=0;i<N;i++){
assert(A[i].size()==M);
}
}
#include <Grid/perfmon/Timer.h>
#include <Grid/perfmon/PerfCount.h>
#include <Grid/log/Log.h>
#include <Grid/allocator/AlignedAllocator.h>
#include <Grid/simd/Simd.h>
#include <Grid/serialisation/Serialisation.h>
#include <Grid/threads/Threads.h>
#include <Grid/util/Util.h>
#include <Grid/communicator/Communicator.h>
#include <Grid/cartesian/Cartesian.h>
#include <Grid/tensors/Tensors.h>
#include <Grid/lattice/Lattice.h>
#include <Grid/cshift/Cshift.h>
#include <Grid/stencil/Stencil.h>
#include <Grid/parallelIO/BinaryIO.h>
#include <Grid/algorithms/Algorithms.h>
template<class T> inline void SizeSquare(Matrix<T>& A,int &N)
{
int M;
Size(A,N,M);
assert(N==M);
}
template<class T> inline void Fill(Matrix<T>& A,T & val)
{
int N,M;
Size(A,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
A[i][j]=val;
}}
}
template<class T> inline void Diagonal(Matrix<T>& A,T & val)
{
int N;
SizeSquare(A,N);
for(int i=0;i<N;i++){
A[i][i]=val;
}
}
template<class T> inline void Identity(Matrix<T>& A)
{
Fill(A,0.0);
Diagonal(A,1.0);
}
};
}
#endif
+13 -7
View File
@@ -2,12 +2,12 @@
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/hmc/HMC.cc
Source file: ./lib/Grid.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
Author: azusayamaguchi <ayamaguc@YAMAKAZE.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
@@ -27,10 +27,16 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#ifndef GRID_QCD_CORE_H
#define GRID_QCD_CORE_H
namespace Grid{
namespace QCD{
/////////////////////////
// Core Grid QCD headers
/////////////////////////
#include <Grid/GridCore.h>
#include <Grid/qcd/QCD.h>
#include <Grid/qcd/spin/Spin.h>
#include <Grid/qcd/utils/Utils.h>
#include <Grid/qcd/representations/Representations.h>
}
}
#endif
+29
View File
@@ -0,0 +1,29 @@
#ifndef GRID_STD_H
#define GRID_STD_H
///////////////////
// Std C++ dependencies
///////////////////
#include <cassert>
#include <complex>
#include <vector>
#include <string>
#include <iostream>
#include <iomanip>
#include <random>
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>
#include <sys/time.h>
#include <chrono>
#include <zlib.h>
///////////////////
// Grid config
///////////////////
#include "Config.h"
#endif /* GRID_STD_H */
+9
View File
@@ -0,0 +1,9 @@
#pragma once
#if defined __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
#include <Grid/Eigen/Dense>
#if defined __GNUC__
#pragma GCC diagnostic pop
#endif
+2 -2
View File
@@ -10,8 +10,8 @@ if BUILD_COMMS_MPI3
extra_sources+=communicator/Communicator_base.cc
endif
if BUILD_COMMS_MPI3L
extra_sources+=communicator/Communicator_mpi3_leader.cc
if BUILD_COMMS_MPIT
extra_sources+=communicator/Communicator_mpit.cc
extra_sources+=communicator/Communicator_base.cc
endif
Binary file not shown.
-154
View File
@@ -1,154 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Old/Tensor_peek.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_MATH_PEEK_H
#define GRID_MATH_PEEK_H
namespace Grid {
//////////////////////////////////////////////////////////////////////////////
// Peek on a specific index; returns a scalar in that index, tensor inherits rest
//////////////////////////////////////////////////////////////////////////////
// If we hit the right index, return scalar with no further recursion
//template<int Level> inline ComplexF peekIndex(const ComplexF arg) { return arg;}
//template<int Level> inline ComplexD peekIndex(const ComplexD arg) { return arg;}
//template<int Level> inline RealF peekIndex(const RealF arg) { return arg;}
//template<int Level> inline RealD peekIndex(const RealD arg) { return arg;}
#if 0
// Scalar peek, no indices
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
auto peekIndex(const iScalar<vtype> &arg) -> iScalar<vtype>
{
return arg;
}
// Vector peek, one index
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
auto peekIndex(const iVector<vtype,N> &arg,int i) -> iScalar<vtype> // Index matches
{
iScalar<vtype> ret; // return scalar
ret._internal = arg._internal[i];
return ret;
}
// Matrix peek, two indices
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
auto peekIndex(const iMatrix<vtype,N> &arg,int i,int j) -> iScalar<vtype>
{
iScalar<vtype> ret; // return scalar
ret._internal = arg._internal[i][j];
return ret;
}
/////////////
// No match peek for scalar,vector,matrix must forward on either 0,1,2 args. Must have 9 routines with notvalue
/////////////
// scalar
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iScalar<vtype> &arg) -> iScalar<decltype(peekIndex<Level>(arg._internal))>
{
iScalar<decltype(peekIndex<Level>(arg._internal))> ret;
ret._internal= peekIndex<Level>(arg._internal);
return ret;
}
template<int Level,class vtype, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iScalar<vtype> &arg,int i) -> iScalar<decltype(peekIndex<Level>(arg._internal,i))>
{
iScalar<decltype(peekIndex<Level>(arg._internal,i))> ret;
ret._internal=peekIndex<Level>(arg._internal,i);
return ret;
}
template<int Level,class vtype, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iScalar<vtype> &arg,int i,int j) -> iScalar<decltype(peekIndex<Level>(arg._internal,i,j))>
{
iScalar<decltype(peekIndex<Level>(arg._internal,i,j))> ret;
ret._internal=peekIndex<Level>(arg._internal,i,j);
return ret;
}
// vector
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iVector<vtype,N> &arg) -> iVector<decltype(peekIndex<Level>(arg._internal[0])),N>
{
iVector<decltype(peekIndex<Level>(arg._internal[0])),N> ret;
for(int ii=0;ii<N;ii++){
ret._internal[ii]=peekIndex<Level>(arg._internal[ii]);
}
return ret;
}
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iVector<vtype,N> &arg,int i) -> iVector<decltype(peekIndex<Level>(arg._internal[0],i)),N>
{
iVector<decltype(peekIndex<Level>(arg._internal[0],i)),N> ret;
for(int ii=0;ii<N;ii++){
ret._internal[ii]=peekIndex<Level>(arg._internal[ii],i);
}
return ret;
}
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iVector<vtype,N> &arg,int i,int j) -> iVector<decltype(peekIndex<Level>(arg._internal[0],i,j)),N>
{
iVector<decltype(peekIndex<Level>(arg._internal[0],i,j)),N> ret;
for(int ii=0;ii<N;ii++){
ret._internal[ii]=peekIndex<Level>(arg._internal[ii],i,j);
}
return ret;
}
// matrix
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iMatrix<vtype,N> &arg) -> iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N>
{
iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N> ret;
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj]);// Could avoid this because peeking a scalar is dumb
}}
return ret;
}
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iMatrix<vtype,N> &arg,int i) -> iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i)),N>
{
iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i)),N> ret;
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj],i);
}}
return ret;
}
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iMatrix<vtype,N> &arg,int i,int j) -> iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i,j)),N>
{
iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i,j)),N> ret;
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj],i,j);
}}
return ret;
}
#endif
}
#endif
-127
View File
@@ -1,127 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Old/Tensor_poke.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_MATH_POKE_H
#define GRID_MATH_POKE_H
namespace Grid {
//////////////////////////////////////////////////////////////////////////////
// Poke a specific index;
//////////////////////////////////////////////////////////////////////////////
#if 0
// Scalar poke
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
void pokeIndex(iScalar<vtype> &ret, const iScalar<vtype> &arg)
{
ret._internal = arg._internal;
}
// Vector poke, one index
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
void pokeIndex(iVector<vtype,N> &ret, const iScalar<vtype> &arg,int i)
{
ret._internal[i] = arg._internal;
}
//Matrix poke, two indices
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
void pokeIndex(iMatrix<vtype,N> &ret, const iScalar<vtype> &arg,int i,int j)
{
ret._internal[i][j] = arg._internal;
}
/////////////
// No match poke for scalar,vector,matrix must forward on either 0,1,2 args. Must have 9 routines with notvalue
/////////////
// scalar
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal))> &arg)
{
pokeIndex<Level>(ret._internal,arg._internal);
}
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal,0))> &arg, int i)
{
pokeIndex<Level>(ret._internal,arg._internal,i);
}
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal,0,0))> &arg,int i,int j)
{
pokeIndex<Level>(ret._internal,arg._internal,i,j);
}
// Vector
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iVector<vtype,N> &ret, iVector<decltype(peekIndex<Level>(ret._internal)),N> &arg)
{
for(int ii=0;ii<N;ii++){
pokeIndex<Level>(ret._internal[ii],arg._internal[ii]);
}
}
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(peekIndex<Level>(ret._internal,0)),N> &arg,int i)
{
for(int ii=0;ii<N;ii++){
pokeIndex<Level>(ret._internal[ii],arg._internal[ii],i);
}
}
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(peekIndex<Level>(ret._internal,0,0)),N> &arg,int i,int j)
{
for(int ii=0;ii<N;ii++){
pokeIndex<Level>(ret._internal[ii],arg._internal[ii],i,j);
}
}
// Matrix
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal)),N> &arg)
{
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj]);
}}
}
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal,0)),N> &arg,int i)
{
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj],i);
}}
}
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal,0,0)),N> &arg, int i,int j)
{
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj],i,j);
}}
}
#endif
}
#endif
@@ -1,6 +1,6 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Algorithms.h
@@ -37,38 +37,24 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/algorithms/approx/Chebyshev.h>
#include <Grid/algorithms/approx/Remez.h>
#include <Grid/algorithms/approx/MultiShiftFunction.h>
#include <Grid/algorithms/approx/Forecast.h>
#include <Grid/algorithms/iterative/ConjugateGradient.h>
#include <Grid/algorithms/iterative/ConjugateResidual.h>
#include <Grid/algorithms/iterative/NormalEquations.h>
#include <Grid/algorithms/iterative/SchurRedBlack.h>
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
// Lanczos support
#include <Grid/algorithms/iterative/MatrixUtils.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
#include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/CoarsenedMatrix.h>
#include <Grid/algorithms/FFT.h>
// Eigen/lanczos
// EigCg
// MCR
// Pcg
// Multishift CG
// Hdcg
// GCR
// etc..
// integrator/Leapfrog
// integrator/Omelyan
// integrator/ForceGradient
// montecarlo/hmc
// montecarlo/rhmc
// montecarlo/metropolis
// etc...
#endif
+3 -5
View File
@@ -267,8 +267,7 @@ namespace Grid {
SimpleCompressor<siteVector> compressor;
Stencil.HaloExchange(in,compressor);
PARALLEL_FOR_LOOP
for(int ss=0;ss<Grid()->oSites();ss++){
parallel_for(int ss=0;ss<Grid()->oSites();ss++){
siteVector res = zero;
siteVector nbr;
int ptype;
@@ -380,8 +379,7 @@ PARALLEL_FOR_LOOP
Subspace.ProjectToSubspace(oProj,oblock);
// blockProject(iProj,iblock,Subspace.subspace);
// blockProject(oProj,oblock,Subspace.subspace);
PARALLEL_FOR_LOOP
for(int ss=0;ss<Grid()->oSites();ss++){
parallel_for(int ss=0;ss<Grid()->oSites();ss++){
for(int j=0;j<nbasis;j++){
if( disp!= 0 ) {
A[p]._odata[ss](j,i) = oProj._odata[ss](j);
@@ -427,7 +425,7 @@ PARALLEL_FOR_LOOP
A[p]=zero;
}
GridParallelRNG RNG(Grid()); RNG.SeedRandomDevice();
GridParallelRNG RNG(Grid()); RNG.SeedFixedIntegers(std::vector<int>({55,72,19,17,34}));
Lattice<iScalar<CComplex> > val(Grid()); random(RNG,val);
Complex one(1.0);
+3 -2
View File
@@ -230,6 +230,7 @@ namespace Grid {
// Barrel shift and collect global pencil
std::vector<int> lcoor(Nd), gcoor(Nd);
result = source;
int pc = processor_coor[dim];
for(int p=0;p<processors[dim];p++) {
PARALLEL_REGION
{
@@ -240,7 +241,8 @@ namespace Grid {
for(int idx=0;idx<sgrid->lSites();idx++) {
sgrid->LocalIndexToLocalCoor(idx,cbuf);
peekLocalSite(s,result,cbuf);
cbuf[dim]+=p*L;
cbuf[dim]+=((pc+p) % processors[dim])*L;
// cbuf[dim]+=p*L;
pokeLocalSite(s,pgbuf,cbuf);
}
}
@@ -278,7 +280,6 @@ namespace Grid {
flops+= flops_call*NN;
// writing out result
int pc = processor_coor[dim];
PARALLEL_REGION
{
std::vector<int> clbuf(Nd), cgbuf(Nd);
+38 -10
View File
@@ -162,15 +162,10 @@ namespace Grid {
_Mat.M(in,out);
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
ComplexD dot;
_Mat.M(in,out);
dot= innerProduct(in,out);
n1=real(dot);
dot = innerProduct(out,out);
n2=real(dot);
ComplexD dot= innerProduct(in,out); n1=real(dot);
n2=norm2(out);
}
void HermOp(const Field &in, Field &out){
_Mat.M(in,out);
@@ -192,10 +187,10 @@ namespace Grid {
ni=Mpc(in,tmp);
no=MpcDag(tmp,out);
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
MpcDagMpc(in,out,n1,n2);
}
void HermOp(const Field &in, Field &out){
virtual void HermOp(const Field &in, Field &out){
RealD n1,n2;
HermOpAndNorm(in,out,n1,n2);
}
@@ -235,7 +230,7 @@ namespace Grid {
Field tmp(in._grid);
_Mat.MeooeDag(in,tmp);
_Mat.MooeeInvDag(tmp,out);
_Mat.MooeeInvDag(tmp,out);
_Mat.MeooeDag(out,tmp);
_Mat.MooeeDag(in,out);
@@ -300,6 +295,39 @@ namespace Grid {
}
};
//
template<class Matrix,class Field>
class SchurStaggeredOperator : public SchurOperatorBase<Field> {
protected:
Matrix &_Mat;
public:
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){};
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
ComplexD dot;
n2 = Mpc(in,out);
dot= innerProduct(in,out);
n1 = real(dot);
}
virtual void HermOp(const Field &in, Field &out){
Mpc(in,out);
}
virtual RealD Mpc (const Field &in, Field &out) {
Field tmp(in._grid);
_Mat.Meooe(in,tmp);
_Mat.MooeeInv(tmp,out);
_Mat.MeooeDag(out,tmp);
_Mat.Mooee(in,out);
return axpy_norm(out,-1.0,tmp,out);
}
virtual RealD MpcDag (const Field &in, Field &out){
return Mpc(in,out);
}
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
assert(0);// Never need with staggered
}
};
template<class Matrix,class Field> using SchurStagOperator = SchurStaggeredOperator<Matrix,Field>;
/////////////////////////////////////////////////////////////
// Base classes for functions of operators
View File
+3 -2
View File
@@ -197,8 +197,9 @@ namespace Grid {
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
GridBase *grid=in._grid;
//std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
//<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
// std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
//std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
int vol=grid->gSites();
+152
View File
@@ -0,0 +1,152 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/approx/Forecast.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef INCLUDED_FORECAST_H
#define INCLUDED_FORECAST_H
namespace Grid {
// Abstract base class.
// Takes a matrix (Mat), a source (phi), and a vector of Fields (chi)
// and returns a forecasted solution to the system D*psi = phi (psi).
template<class Matrix, class Field>
class Forecast
{
public:
virtual Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& chi) = 0;
};
// Implementation of Brower et al.'s chronological inverter (arXiv:hep-lat/9509012),
// used to forecast solutions across poles of the EOFA heatbath.
//
// Modified from CPS (cps_pp/src/util/dirac_op/d_op_base/comsrc/minresext.C)
template<class Matrix, class Field>
class ChronoForecast : public Forecast<Matrix,Field>
{
public:
Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& prev_solns)
{
int degree = prev_solns.size();
Field chi(phi); // forecasted solution
// Trivial cases
if(degree == 0){ chi = zero; return chi; }
else if(degree == 1){ return prev_solns[0]; }
RealD dot;
ComplexD xp;
Field r(phi); // residual
Field Mv(phi);
std::vector<Field> v(prev_solns); // orthonormalized previous solutions
std::vector<Field> MdagMv(degree,phi);
// Array to hold the matrix elements
std::vector<std::vector<ComplexD>> G(degree, std::vector<ComplexD>(degree));
// Solution and source vectors
std::vector<ComplexD> a(degree);
std::vector<ComplexD> b(degree);
// Orthonormalize the vector basis
for(int i=0; i<degree; i++){
v[i] *= 1.0/std::sqrt(norm2(v[i]));
for(int j=i+1; j<degree; j++){ v[j] -= innerProduct(v[i],v[j]) * v[i]; }
}
// Perform sparse matrix multiplication and construct rhs
for(int i=0; i<degree; i++){
b[i] = innerProduct(v[i],phi);
Mat.M(v[i],Mv);
Mat.Mdag(Mv,MdagMv[i]);
G[i][i] = innerProduct(v[i],MdagMv[i]);
}
// Construct the matrix
for(int j=0; j<degree; j++){
for(int k=j+1; k<degree; k++){
G[j][k] = innerProduct(v[j],MdagMv[k]);
G[k][j] = std::conj(G[j][k]);
}}
// Gauss-Jordan elimination with partial pivoting
for(int i=0; i<degree; i++){
// Perform partial pivoting
int k = i;
for(int j=i+1; j<degree; j++){ if(std::abs(G[j][j]) > std::abs(G[k][k])){ k = j; } }
if(k != i){
xp = b[k];
b[k] = b[i];
b[i] = xp;
for(int j=0; j<degree; j++){
xp = G[k][j];
G[k][j] = G[i][j];
G[i][j] = xp;
}
}
// Convert matrix to upper triangular form
for(int j=i+1; j<degree; j++){
xp = G[j][i]/G[i][i];
b[j] -= xp * b[i];
for(int k=0; k<degree; k++){ G[j][k] -= xp*G[i][k]; }
}
}
// Use Gaussian elimination to solve equations and calculate initial guess
chi = zero;
r = phi;
for(int i=degree-1; i>=0; i--){
a[i] = 0.0;
for(int j=i+1; j<degree; j++){ a[i] += G[i][j] * a[j]; }
a[i] = (b[i]-a[i])/G[i][i];
chi += a[i]*v[i];
r -= a[i]*MdagMv[i];
}
RealD true_r(0.0);
ComplexD tmp;
for(int i=0; i<degree; i++){
tmp = -b[i];
for(int j=0; j<degree; j++){ tmp += G[i][j]*a[j]; }
tmp = std::conj(tmp)*tmp;
true_r += std::sqrt(tmp.real());
}
RealD error = std::sqrt(norm2(r)/norm2(phi));
std::cout << GridLogMessage << "ChronoForecast: |res|/|src| = " << error << std::endl;
return chi;
};
};
}
#endif
+1 -1
View File
@@ -25,7 +25,7 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/GridCore.h>
namespace Grid {
double MultiShiftFunction::approx(double x)
+1 -1
View File
@@ -16,7 +16,7 @@
#define INCLUDED_ALG_REMEZ_H
#include <stddef.h>
#include <Config.h>
#include <Grid/GridStd.h>
#ifdef HAVE_LIBGMP
#include "bigfloat.h"
@@ -0,0 +1,606 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h
Copyright (C) 2017
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H
#define GRID_BLOCK_CONJUGATE_GRADIENT_H
namespace Grid {
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS };
//////////////////////////////////////////////////////////////////////////
// Block conjugate gradient. Dimension zero should be the block direction
//////////////////////////////////////////////////////////////////////////
template <class Field>
class BlockConjugateGradient : public OperatorFunction<Field> {
public:
typedef typename Field::scalar_type scomplex;
int blockDim ;
int Nblock;
BlockCGtype CGtype;
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv)
{};
////////////////////////////////////////////////////////////////////////////////////////////////////
// Thin QR factorisation (google it)
////////////////////////////////////////////////////////////////////////////////////////////////////
void ThinQRfact (Eigen::MatrixXcd &m_rr,
Eigen::MatrixXcd &C,
Eigen::MatrixXcd &Cinv,
Field & Q,
const Field & R)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
////////////////////////////////////////////////////////////////////////////////////////////////////
//Dimensions
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
//
// Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen)
//
// Q C = R => Q = R C^{-1}
//
// Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}
//
// Set C = L^{dag}, and then Q^dag Q = ident
//
// Checks:
// Cdag C = Rdag R ; passes.
// QdagQ = 1 ; passes
////////////////////////////////////////////////////////////////////////////////////////////////////
sliceInnerProductMatrix(m_rr,R,R,Orthog);
// Force manifest hermitian to avoid rounding related
m_rr = 0.5*(m_rr+m_rr.adjoint());
#if 0
std::cout << " Calling Cholesky ldlt on m_rr " << m_rr <<std::endl;
Eigen::MatrixXcd L_ldlt = m_rr.ldlt().matrixL();
std::cout << " Called Cholesky ldlt on m_rr " << L_ldlt <<std::endl;
auto D_ldlt = m_rr.ldlt().vectorD();
std::cout << " Called Cholesky ldlt on m_rr " << D_ldlt <<std::endl;
#endif
// std::cout << " Calling Cholesky llt on m_rr " <<std::endl;
Eigen::MatrixXcd L = m_rr.llt().matrixL();
// std::cout << " Called Cholesky llt on m_rr " << L <<std::endl;
C = L.adjoint();
Cinv = C.inverse();
////////////////////////////////////////////////////////////////////////////////////////////////////
// Q = R C^{-1}
//
// Q_j = R_i Cinv(i,j)
//
// NB maddMatrix conventions are Right multiplication X[j] a[j,i] already
////////////////////////////////////////////////////////////////////////////////////////////////////
sliceMulMatrix(Q,Cinv,R,Orthog);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Call one of several implementations
////////////////////////////////////////////////////////////////////////////////////////////////////
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
if ( CGtype == BlockCGrQ ) {
BlockCGrQsolve(Linop,Src,Psi);
} else if (CGtype == BlockCG ) {
BlockCGsolve(Linop,Src,Psi);
} else if (CGtype == CGmultiRHS ) {
CGmultiRHSsolve(Linop,Src,Psi);
} else {
assert(0);
}
}
////////////////////////////////////////////////////////////////////////////
// BlockCGrQ implementation:
//--------------------------
// X is guess/Solution
// B is RHS
// Solve A X_i = B_i ; i refers to Nblock index
////////////////////////////////////////////////////////////////////////////
void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
Nblock = B._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
X.checkerboard = B.checkerboard;
conformable(X, B);
Field tmp(B);
Field Q(B);
Field D(B);
Field Z(B);
Field AD(B);
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,B,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,B,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,X,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
/************************************************************************
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
************************************************************************
* Dimensions:
*
* X,B==(Nferm x Nblock)
* A==(Nferm x Nferm)
*
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
*
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
* for k:
* Z = AD
* M = [D^dag Z]^{-1}
* X = X + D MC
* QS = Q - ZM
* D = Q + D S^dag
* C = S C
*/
///////////////////////////////////////
// Initial block: initial search dir is guess
///////////////////////////////////////
std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
Linop.HermOp(X, AD);
tmp = B - AD;
//std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl;
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
//std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl;
//std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl;
//std::cout << GridLogMessage << " m_C " << m_C<<std::endl;
//std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl;
D=Q;
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
///////////////////////////////////////
// Timers
///////////////////////////////////////
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch QRTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
//3. Z = AD
MatrixTimer.Start();
Linop.HermOp(D, Z);
MatrixTimer.Stop();
//std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl;
//4. M = [D^dag Z]^{-1}
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
sliceInnerTimer.Stop();
m_M = m_DZ.inverse();
//std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl;
//5. X = X + D MC
m_tmp = m_M * m_C;
sliceMaddTimer.Start();
sliceMaddMatrix(X,m_tmp, D,X,Orthog);
sliceMaddTimer.Stop();
//6. QS = Q - ZM
sliceMaddTimer.Start();
sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0);
sliceMaddTimer.Stop();
QRTimer.Start();
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
QRTimer.Stop();
//7. D = Q + D S^dag
m_tmp = m_S.adjoint();
sliceMaddTimer.Start();
sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
sliceMaddTimer.Stop();
//8. C = S C
m_C = m_S*m_C;
/*********************
* convergence monitor
*********************
*/
m_rr = m_C.adjoint() * m_C;
RealD max_resid=0;
RealD rrsum=0;
RealD rr;
for(int b=0;b<Nblock;b++) {
rrsum+=real(m_rr(b,b));
rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl;
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(X, AD);
AD = AD-B;
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
//////////////////////////////////////////////////////////////////////////
// Block conjugate gradient; Original O'Leary Dimension zero should be the block direction
//////////////////////////////////////////////////////////////////////////
void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
Nblock = Src._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
Psi.checkerboard = Src.checkerboard;
conformable(Psi, Src);
Field P(Src);
Field AP(Src);
Field R(Src);
Eigen::MatrixXcd m_pAp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_alpha = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_beta = Eigen::MatrixXcd::Zero(Nblock,Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,Src,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,Src,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,Psi,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
// Initial search dir is guess
Linop.HermOp(Psi, AP);
/************************************************************************
* Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
************************************************************************
* O'Leary : R = B - A X
* O'Leary : P = M R ; preconditioner M = 1
* O'Leary : alpha = PAP^{-1} RMR
* O'Leary : beta = RMR^{-1}_old RMR_new
* O'Leary : X=X+Palpha
* O'Leary : R_new=R_old-AP alpha
* O'Leary : P=MR_new+P beta
*/
R = Src - AP;
P = R;
sliceInnerProductMatrix(m_rr,R,R,Orthog);
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
RealD rrsum=0;
for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b));
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
MatrixTimer.Start();
Linop.HermOp(P, AP);
MatrixTimer.Stop();
// Alpha
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_pAp,P,AP,Orthog);
sliceInnerTimer.Stop();
m_pAp_inv = m_pAp.inverse();
m_alpha = m_pAp_inv * m_rr ;
// Psi, R update
sliceMaddTimer.Start();
sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog); // add alpha * P to psi
sliceMaddMatrix(R ,m_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
sliceMaddTimer.Stop();
// Beta
m_rr_inv = m_rr.inverse();
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_rr,R,R,Orthog);
sliceInnerTimer.Stop();
m_beta = m_rr_inv *m_rr;
// Search update
sliceMaddTimer.Start();
sliceMaddMatrix(AP,m_beta,P,R,Orthog);
sliceMaddTimer.Stop();
P= AP;
/*********************
* convergence monitor
*********************
*/
RealD max_resid=0;
RealD rr;
for(int b=0;b<Nblock;b++){
rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(Psi, AP);
AP = AP-Src;
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
//////////////////////////////////////////////////////////////////////////
// multiRHS conjugate gradient. Dimension zero should be the block direction
// Use this for spread out across nodes
//////////////////////////////////////////////////////////////////////////
void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
int Orthog = blockDim; // First dimension is block dim
Nblock = Src._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
Psi.checkerboard = Src.checkerboard;
conformable(Psi, Src);
Field P(Src);
Field AP(Src);
Field R(Src);
std::vector<ComplexD> v_pAp(Nblock);
std::vector<RealD> v_rr (Nblock);
std::vector<RealD> v_rr_inv(Nblock);
std::vector<RealD> v_alpha(Nblock);
std::vector<RealD> v_beta(Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,Src,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,Src,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,Psi,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
// Initial search dir is guess
Linop.HermOp(Psi, AP);
R = Src - AP;
P = R;
sliceNorm(v_rr,R,Orthog);
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch sliceNormTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
RealD rrsum=0;
for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]);
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
MatrixTimer.Start();
Linop.HermOp(P, AP);
MatrixTimer.Stop();
// Alpha
sliceInnerTimer.Start();
sliceInnerProductVector(v_pAp,P,AP,Orthog);
sliceInnerTimer.Stop();
for(int b=0;b<Nblock;b++){
v_alpha[b] = v_rr[b]/real(v_pAp[b]);
}
// Psi, R update
sliceMaddTimer.Start();
sliceMaddVector(Psi,v_alpha, P,Psi,Orthog); // add alpha * P to psi
sliceMaddVector(R ,v_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
sliceMaddTimer.Stop();
// Beta
for(int b=0;b<Nblock;b++){
v_rr_inv[b] = 1.0/v_rr[b];
}
sliceNormTimer.Start();
sliceNorm(v_rr,R,Orthog);
sliceNormTimer.Stop();
for(int b=0;b<Nblock;b++){
v_beta[b] = v_rr_inv[b] *v_rr[b];
}
// Search update
sliceMaddTimer.Start();
sliceMaddVector(P,v_beta,P,R,Orthog);
sliceMaddTimer.Stop();
/*********************
* convergence monitor
*********************
*/
RealD max_resid=0;
for(int b=0;b<Nblock;b++){
RealD rr = v_rr[b]/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(Psi, AP);
AP = AP-Src;
std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tNorm " << sliceNormTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
};
}
#endif
+25 -28
View File
@@ -52,8 +52,8 @@ class ConjugateGradient : public OperatorFunction<Field> {
MaxIterations(maxit),
ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
Field &psi) {
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
@@ -78,18 +78,12 @@ class ConjugateGradient : public OperatorFunction<Field> {
cp = a;
ssq = norm2(src);
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: guess " << guess << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: p " << a << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: guess " << guess << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: p " << a << std::endl;
RealD rsq = Tolerance * Tolerance * ssq;
@@ -99,8 +93,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
}
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq
<< std::endl;
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
GridStopWatch LinalgTimer;
GridStopWatch MatrixTimer;
@@ -130,8 +123,11 @@ class ConjugateGradient : public OperatorFunction<Field> {
p = p * b + r;
LinalgTimer.Stop();
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl;
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
// Stopping condition
if (cp <= rsq) {
@@ -139,32 +135,33 @@ class ConjugateGradient : public OperatorFunction<Field> {
Linop.HermOpAndNorm(psi, mmp, d, qq);
p = mmp - src;
RealD mmpnorm = sqrt(norm2(mmp));
RealD psinorm = sqrt(norm2(psi));
RealD srcnorm = sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage
<< "ConjugateGradient: Converged on iteration " << k << std::endl;
std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual << " target "
<< Tolerance << std::endl;
std::cout << GridLogMessage << "Time elapsed: Iterations "
<< SolverTimer.Elapsed() << " Matrix "
<< MatrixTimer.Elapsed() << " Linalg "
<< LinalgTimer.Elapsed();
std::cout << std::endl;
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl;
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
<< std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
};
}
@@ -0,0 +1,256 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ConjugateGradientReliableUpdate.h
Copyright (C) 2015
Author: Christopher Kelly <ckelly@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H
#define GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H
namespace Grid {
template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
class ConjugateGradientReliableUpdate : public LinearFunction<FieldD> {
public:
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
Integer ReliableUpdatesPerformed;
bool DoFinalCleanup; //Final DP cleanup, defaults to true
Integer IterationsToCleanup; //Final DP cleanup step iterations
LinearOperatorBase<FieldF> &Linop_f;
LinearOperatorBase<FieldD> &Linop_d;
GridBase* SinglePrecGrid;
RealD Delta; //reliable update parameter
//Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single
LinearOperatorBase<FieldF> *Linop_fallback;
RealD fallback_transition_tol;
ConjugateGradientReliableUpdate(RealD tol, Integer maxit, RealD _delta, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d, bool err_on_no_conv = true)
: Tolerance(tol),
MaxIterations(maxit),
Delta(_delta),
Linop_f(_Linop_f),
Linop_d(_Linop_d),
SinglePrecGrid(_sp_grid),
ErrorOnNoConverge(err_on_no_conv),
DoFinalCleanup(true),
Linop_fallback(NULL)
{};
void setFallbackLinop(LinearOperatorBase<FieldF> &_Linop_fallback, const RealD _fallback_transition_tol){
Linop_fallback = &_Linop_fallback;
fallback_transition_tol = _fallback_transition_tol;
}
void operator()(const FieldD &src, FieldD &psi) {
LinearOperatorBase<FieldF> *Linop_f_use = &Linop_f;
bool using_fallback = false;
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD cp, c, a, d, b, ssq, qq, b_pred;
FieldD p(src);
FieldD mmp(src);
FieldD r(src);
// Initial residual computation & set up
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
Linop_d.HermOpAndNorm(psi, mmp, d, b);
r = src - mmp;
p = r;
a = norm2(p);
cp = a;
ssq = norm2(src);
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: guess " << guess << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: p " << a << std::endl;
RealD rsq = Tolerance * Tolerance * ssq;
// Check if guess is really REALLY good :)
if (cp <= rsq) {
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n";
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
return;
}
//Single prec initialization
FieldF r_f(SinglePrecGrid);
r_f.checkerboard = r.checkerboard;
precisionChange(r_f, r);
FieldF psi_f(r_f);
psi_f = zero;
FieldF p_f(r_f);
FieldF mmp_f(r_f);
RealD MaxResidSinceLastRelUp = cp; //initial residual
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
GridStopWatch LinalgTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k = 0;
int l = 0;
for (k = 1; k <= MaxIterations; k++) {
c = cp;
MatrixTimer.Start();
Linop_f_use->HermOpAndNorm(p_f, mmp_f, d, qq);
MatrixTimer.Stop();
LinalgTimer.Start();
a = c / d;
b_pred = a * (a * qq - d) / c;
cp = axpy_norm(r_f, -a, mmp_f, r_f);
b = cp / c;
// Fuse these loops ; should be really easy
psi_f = a * p_f + psi_f;
//p_f = p_f * b + r_f;
LinalgTimer.Stop();
std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl;
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
if(cp > MaxResidSinceLastRelUp){
std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: updating MaxResidSinceLastRelUp : " << MaxResidSinceLastRelUp << " -> " << cp << std::endl;
MaxResidSinceLastRelUp = cp;
}
// Stopping condition
if (cp <= rsq) {
//Although not written in the paper, I assume that I have to add on the final solution
precisionChange(mmp, psi_f);
psi = psi + mmp;
SolverTimer.Stop();
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
p = mmp - src;
RealD srcnorm = sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate Converged on iteration " << k << " after " << l << " reliable updates" << std::endl;
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
ReliableUpdatesPerformed = l;
if(DoFinalCleanup){
//Do a final CG to cleanup
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate performing final cleanup.\n";
ConjugateGradient<FieldD> CG(Tolerance,MaxIterations);
CG.ErrorOnNoConverge = ErrorOnNoConverge;
CG(Linop_d,src,psi);
IterationsToCleanup = CG.IterationsToComplete;
}
else if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate complete.\n";
return;
}
else if(cp < Delta * MaxResidSinceLastRelUp) { //reliable update
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate "
<< cp << "(residual) < " << Delta << "(Delta) * " << MaxResidSinceLastRelUp << "(MaxResidSinceLastRelUp) on iteration " << k << " : performing reliable update\n";
precisionChange(mmp, psi_f);
psi = psi + mmp;
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
r = src - mmp;
psi_f = zero;
precisionChange(r_f, r);
cp = norm2(r);
MaxResidSinceLastRelUp = cp;
b = cp/c;
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate new residual " << cp << std::endl;
l = l+1;
}
p_f = p_f * b + r_f; //update search vector after reliable update appears to help convergence
if(!using_fallback && Linop_fallback != NULL && cp < fallback_transition_tol){
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate switching to fallback linear operator on iteration " << k << " at residual " << cp << std::endl;
Linop_f_use = Linop_fallback;
using_fallback = true;
}
}
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate did NOT converge"
<< std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
ReliableUpdatesPerformed = l;
}
};
};
#endif
-137
View File
@@ -1,137 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/DenseMatrix.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_DENSE_MATRIX_H
#define GRID_DENSE_MATRIX_H
namespace Grid {
/////////////////////////////////////////////////////////////
// Matrix untils
/////////////////////////////////////////////////////////////
template<class T> using DenseVector = std::vector<T>;
template<class T> using DenseMatrix = DenseVector<DenseVector<T> >;
template<class T> void Size(DenseVector<T> & vec, int &N)
{
N= vec.size();
}
template<class T> void Size(DenseMatrix<T> & mat, int &N,int &M)
{
N= mat.size();
M= mat[0].size();
}
template<class T> void SizeSquare(DenseMatrix<T> & mat, int &N)
{
int M; Size(mat,N,M);
assert(N==M);
}
template<class T> void Resize(DenseVector<T > & mat, int N) {
mat.resize(N);
}
template<class T> void Resize(DenseMatrix<T > & mat, int N, int M) {
mat.resize(N);
for(int i=0;i<N;i++){
mat[i].resize(M);
}
}
template<class T> void Fill(DenseMatrix<T> & mat, T&val) {
int N,M;
Size(mat,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
mat[i][j] = val;
}}
}
/** Transpose of a matrix **/
template<class T> DenseMatrix<T> Transpose(DenseMatrix<T> & mat){
int N,M;
Size(mat,N,M);
DenseMatrix<T> C; Resize(C,M,N);
for(int i=0;i<M;i++){
for(int j=0;j<N;j++){
C[i][j] = mat[j][i];
}}
return C;
}
/** Set DenseMatrix to unit matrix **/
template<class T> void Unity(DenseMatrix<T> &A){
int N; SizeSquare(A,N);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
if ( i==j ) A[i][j] = 1;
else A[i][j] = 0;
}
}
}
/** Add C * I to matrix **/
template<class T>
void PlusUnit(DenseMatrix<T> & A,T c){
int dim; SizeSquare(A,dim);
for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}
}
/** return the Hermitian conjugate of matrix **/
template<class T>
DenseMatrix<T> HermitianConj(DenseMatrix<T> &mat){
int dim; SizeSquare(mat,dim);
DenseMatrix<T> C; Resize(C,dim,dim);
for(int i=0;i<dim;i++){
for(int j=0;j<dim;j++){
C[i][j] = conj(mat[j][i]);
}
}
return C;
}
/**Get a square submatrix**/
template <class T>
DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, int col_end)
{
DenseMatrix<T> H; Resize(H,row_end - row_st,col_end-col_st);
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
H[i-row_st][j-col_st]=A[i][j];
}}
return H;
}
}
#include "Householder.h"
#include "Francis.h"
#endif
-81
View File
@@ -1,81 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/EigenSort.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_EIGENSORT_H
#define GRID_EIGENSORT_H
namespace Grid {
/////////////////////////////////////////////////////////////
// Eigen sorter to begin with
/////////////////////////////////////////////////////////////
template<class Field>
class SortEigen {
private:
//hacking for testing for now
private:
static bool less_lmd(RealD left,RealD right){
return left > right;
}
static bool less_pair(std::pair<RealD,Field const*>& left,
std::pair<RealD,Field const*>& right){
return left.first > (right.first);
}
public:
void push(DenseVector<RealD>& lmd,
DenseVector<Field>& evec,int N) {
DenseVector<Field> cpy(lmd.size(),evec[0]._grid);
for(int i=0;i<lmd.size();i++) cpy[i] = evec[i];
DenseVector<std::pair<RealD, Field const*> > emod(lmd.size());
for(int i=0;i<lmd.size();++i)
emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]);
partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair);
typename DenseVector<std::pair<RealD, Field const*> >::iterator it = emod.begin();
for(int i=0;i<N;++i){
lmd[i]=it->first;
evec[i]=*(it->second);
++it;
}
}
void push(DenseVector<RealD>& lmd,int N) {
std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd);
}
bool saturated(RealD lmd, RealD thrs) {
return fabs(lmd) > fabs(thrs);
}
};
}
#endif
-525
View File
@@ -1,525 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/Francis.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef FRANCIS_H
#define FRANCIS_H
#include <cstdlib>
#include <string>
#include <cmath>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <fstream>
#include <complex>
#include <algorithm>
//#include <timer.h>
//#include <lapacke.h>
//#include <Eigen/Dense>
namespace Grid {
template <class T> int SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
template <class T> int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
/**
Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm.
H =
x x x x x x x x x
x x x x x x x x x
0 x x x x x x x x
0 0 x x x x x x x
0 0 0 x x x x x x
0 0 0 0 x x x x x
0 0 0 0 0 x x x x
0 0 0 0 0 0 x x x
0 0 0 0 0 0 0 x x
Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.
**/
template <class T>
int QReigensystem(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
{
DenseMatrix<T> H = Hin;
int N ; SizeSquare(H,N);
int M = N;
Fill(evals,0);
Fill(evecs,0);
T s,t,x=0,y=0,z=0;
T u,d;
T apd,amd,bc;
DenseVector<T> p(N,0);
T nrm = Norm(H); ///DenseMatrix Norm
int n, m;
int e = 0;
int it = 0;
int tot_it = 0;
int l = 0;
int r = 0;
DenseMatrix<T> P; Resize(P,N,N); Unity(P);
DenseVector<int> trows(N,0);
/// Check if the matrix is really hessenberg, if not abort
RealD sth = 0;
for(int j=0;j<N;j++){
for(int i=j+2;i<N;i++){
sth = abs(H[i][j]);
if(sth > small){
std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl;
exit(1);
}
}
}
do{
std::cout << "Francis QR Step N = " << N << std::endl;
/** Check for convergence
x x x x x
0 x x x x
0 0 x x x
0 0 x x x
0 0 0 0 x
for this matrix l = 4
**/
do{
l = Chop_subdiag(H,nrm,e,small);
r = 0; ///May have converged on more than one eval
///Single eval
if(l == N-1){
evals[e] = H[l][l];
N--; e++; r++; it = 0;
}
///RealD eval
if(l == N-2){
trows[l+1] = 1; ///Needed for UTSolve
apd = H[l][l] + H[l+1][l+1];
amd = H[l][l] - H[l+1][l+1];
bc = (T)4.0*H[l+1][l]*H[l][l+1];
evals[e] = (T)0.5*( apd + sqrt(amd*amd + bc) );
evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) );
N-=2; e+=2; r++; it = 0;
}
} while(r>0);
if(N ==0) break;
DenseVector<T > ck; Resize(ck,3);
DenseVector<T> v; Resize(v,3);
for(int m = N-3; m >= l; m--){
///Starting vector essentially random shift.
if(it%10 == 0 && N >= 3 && it > 0){
s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
z = H[m+1][m]*H[m+2][m+1];
}
///Starting vector implicit Q theorem
else{
s = (H[N-2][N-2] + H[N-1][N-1]);
t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]);
x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
z = H[m+1][m]*H[m+2][m+1];
}
ck[0] = x; ck[1] = y; ck[2] = z;
if(m == l) break;
/** Some stupid thing from numerical recipies, seems to work**/
// PAB.. for heaven's sake quote page, purpose, evidence it works.
// what sort of comment is that!?!?!?
u=abs(H[m][m-1])*(abs(y)+abs(z));
d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1]));
if ((T)abs(u+d) == (T)abs(d) ){
l = m; break;
}
//if (u < small){l = m; break;}
}
if(it > 100000){
std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl;
std::cout << "got " << e << " evals " << l << " " << N << std::endl;
exit(1);
}
normalize(ck); ///Normalization cancels in PHP anyway
T beta;
Householder_vector<T >(ck, 0, 2, v, beta);
Householder_mult<T >(H,v,beta,0,l,l+2,0);
Householder_mult<T >(H,v,beta,0,l,l+2,1);
///Accumulate eigenvector
Householder_mult<T >(P,v,beta,0,l,l+2,1);
int sw = 0; ///Are we on the last row?
for(int k=l;k<N-2;k++){
x = H[k+1][k];
y = H[k+2][k];
z = (T)0.0;
if(k+3 <= N-1){
z = H[k+3][k];
} else{
sw = 1;
v[2] = (T)0.0;
}
ck[0] = x; ck[1] = y; ck[2] = z;
normalize(ck);
Householder_vector<T >(ck, 0, 2-sw, v, beta);
Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,0);
Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,1);
///Accumulate eigenvector
Householder_mult<T >(P,v, beta,0,k+1,k+3-sw,1);
}
it++;
tot_it++;
}while(N > 1);
N = evals.size();
///Annoying - UT solves in reverse order;
DenseVector<T> tmp; Resize(tmp,N);
for(int i=0;i<N;i++){
tmp[i] = evals[N-i-1];
}
evals = tmp;
UTeigenvectors(H, trows, evals, evecs);
for(int i=0;i<evals.size();i++){evecs[i] = P*evecs[i]; normalize(evecs[i]);}
return tot_it;
}
template <class T>
int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
{
/**
Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm.
H =
x x 0 0 0 0
x x x 0 0 0
0 x x x 0 0
0 0 x x x 0
0 0 0 x x x
0 0 0 0 x x
Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. **/
return my_Wilkinson(Hin, evals, evecs, small, small);
}
template <class T>
int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small, RealD tol)
{
int N; SizeSquare(Hin,N);
int M = N;
///I don't want to modify the input but matricies must be passed by reference
//Scale a matrix by its "norm"
//RealD Hnorm = abs( Hin.LargestDiag() ); H = H*(1.0/Hnorm);
DenseMatrix<T> H; H = Hin;
RealD Hnorm = abs(Norm(Hin));
H = H * (1.0 / Hnorm);
// TODO use openmp and memset
Fill(evals,0);
Fill(evecs,0);
T s, t, x = 0, y = 0, z = 0;
T u, d;
T apd, amd, bc;
DenseVector<T> p; Resize(p,N); Fill(p,0);
T nrm = Norm(H); ///DenseMatrix Norm
int n, m;
int e = 0;
int it = 0;
int tot_it = 0;
int l = 0;
int r = 0;
DenseMatrix<T> P; Resize(P,N,N);
Unity(P);
DenseVector<int> trows(N, 0);
/// Check if the matrix is really symm tridiag
RealD sth = 0;
for(int j = 0; j < N; ++j)
{
for(int i = j + 2; i < N; ++i)
{
if(abs(H[i][j]) > tol || abs(H[j][i]) > tol)
{
std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl;
std::cout << "Warning tridiagonalize and call again" << std::endl;
// exit(1); // see what is going on
//return;
}
}
}
do{
do{
//Jasper
//Check if the subdiagonal term is small enough (<small)
//if true then it is converged.
//check start from H.dim - e - 1
//How to deal with more than 2 are converged?
//What if Chop_symm_subdiag return something int the middle?
//--------------
l = Chop_symm_subdiag(H,nrm, e, small);
r = 0; ///May have converged on more than one eval
//Jasper
//In this case
// x x 0 0 0 0
// x x x 0 0 0
// 0 x x x 0 0
// 0 0 x x x 0
// 0 0 0 x x 0
// 0 0 0 0 0 x <- l
//--------------
///Single eval
if(l == N - 1)
{
evals[e] = H[l][l];
N--;
e++;
r++;
it = 0;
}
//Jasper
// x x 0 0 0 0
// x x x 0 0 0
// 0 x x x 0 0
// 0 0 x x 0 0
// 0 0 0 0 x x <- l
// 0 0 0 0 x x
//--------------
///RealD eval
if(l == N - 2)
{
trows[l + 1] = 1; ///Needed for UTSolve
apd = H[l][l] + H[l + 1][ l + 1];
amd = H[l][l] - H[l + 1][l + 1];
bc = (T) 4.0 * H[l + 1][l] * H[l][l + 1];
evals[e] = (T) 0.5 * (apd + sqrt(amd * amd + bc));
evals[e + 1] = (T) 0.5 * (apd - sqrt(amd * amd + bc));
N -= 2;
e += 2;
r++;
it = 0;
}
}while(r > 0);
//Jasper
//Already converged
//--------------
if(N == 0) break;
DenseVector<T> ck,v; Resize(ck,2); Resize(v,2);
for(int m = N - 3; m >= l; m--)
{
///Starting vector essentially random shift.
if(it%10 == 0 && N >= 3 && it > 0)
{
t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]);
x = H[m][m] - t;
z = H[m + 1][m];
} else {
///Starting vector implicit Q theorem
d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5;
t = H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2]
/ (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2]));
x = H[m][m] - t;
z = H[m + 1][m];
}
//Jasper
//why it is here????
//-----------------------
if(m == l)
break;
u = abs(H[m][m - 1]) * (abs(y) + abs(z));
d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1]));
if ((T)abs(u + d) == (T)abs(d))
{
l = m;
break;
}
}
//Jasper
if(it > 1000000)
{
std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl;
std::cout << "got " << e << " evals " << l << " " << N << std::endl;
exit(1);
}
//
T s, c;
Givens_calc<T>(x, z, c, s);
Givens_mult<T>(H, l, l + 1, c, -s, 0);
Givens_mult<T>(H, l, l + 1, c, s, 1);
Givens_mult<T>(P, l, l + 1, c, s, 1);
//
for(int k = l; k < N - 2; ++k)
{
x = H.A[k + 1][k];
z = H.A[k + 2][k];
Givens_calc<T>(x, z, c, s);
Givens_mult<T>(H, k + 1, k + 2, c, -s, 0);
Givens_mult<T>(H, k + 1, k + 2, c, s, 1);
Givens_mult<T>(P, k + 1, k + 2, c, s, 1);
}
it++;
tot_it++;
}while(N > 1);
N = evals.size();
///Annoying - UT solves in reverse order;
DenseVector<T> tmp(N);
for(int i = 0; i < N; ++i)
tmp[i] = evals[N-i-1];
evals = tmp;
//
UTeigenvectors(H, trows, evals, evecs);
//UTSymmEigenvectors(H, trows, evals, evecs);
for(int i = 0; i < evals.size(); ++i)
{
evecs[i] = P * evecs[i];
normalize(evecs[i]);
evals[i] = evals[i] * Hnorm;
}
// // FIXME this is to test
// Hin.write("evecs3", evecs);
// Hin.write("evals3", evals);
// // check rsd
// for(int i = 0; i < M; i++) {
// vector<T> Aevec = Hin * evecs[i];
// RealD norm2(0.);
// for(int j = 0; j < M; j++) {
// norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]);
// }
// }
return tot_it;
}
template <class T>
void Hess(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
/**
turn a matrix A =
x x x x x
x x x x x
x x x x x
x x x x x
x x x x x
into
x x x x x
x x x x x
0 x x x x
0 0 x x x
0 0 0 x x
with householder rotations
Slow.
*/
int N ; SizeSquare(A,N);
DenseVector<T > p; Resize(p,N); Fill(p,0);
for(int k=start;k<N-2;k++){
//cerr << "hess" << k << std::endl;
DenseVector<T > ck,v; Resize(ck,N-k-1); Resize(v,N-k-1);
for(int i=k+1;i<N;i++){ck[i-k-1] = A(i,k);} ///kth column
normalize(ck); ///Normalization cancels in PHP anyway
T beta;
Householder_vector<T >(ck, 0, ck.size()-1, v, beta); ///Householder vector
Householder_mult<T>(A,v,beta,start,k+1,N-1,0); ///A -> PA
Householder_mult<T >(A,v,beta,start,k+1,N-1,1); ///PA -> PAP^H
///Accumulate eigenvector
Householder_mult<T >(Q,v,beta,start,k+1,N-1,1); ///Q -> QP^H
}
/*for(int l=0;l<N-2;l++){
for(int k=l+2;k<N;k++){
A(0,k,l);
}
}*/
}
template <class T>
void Tri(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
///Tridiagonalize a matrix
int N; SizeSquare(A,N);
Hess(A,Q,start);
/*for(int l=0;l<N-2;l++){
for(int k=l+2;k<N;k++){
A(0,l,k);
}
}*/
}
template <class T>
void ForceTridiagonal(DenseMatrix<T> &A){
///Tridiagonalize a matrix
int N ; SizeSquare(A,N);
for(int l=0;l<N-2;l++){
for(int k=l+2;k<N;k++){
A[l][k]=0;
A[k][l]=0;
}
}
}
template <class T>
int my_SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
///Solve a symmetric eigensystem, not necessarily in tridiagonal form
int N; SizeSquare(Ain,N);
DenseMatrix<T > A; A = Ain;
DenseMatrix<T > Q; Resize(Q,N,N); Unity(Q);
Tri(A,Q,0);
int it = my_Wilkinson<T>(A, evals, evecs, small);
for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
return it;
}
template <class T>
int Wilkinson(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
return my_Wilkinson(Ain, evals, evecs, small);
}
template <class T>
int SymmEigensystem(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
return my_SymmEigensystem(Ain, evals, evecs, small);
}
template <class T>
int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
///Solve a general eigensystem, not necessarily in tridiagonal form
int N = Ain.dim;
DenseMatrix<T > A(N); A = Ain;
DenseMatrix<T > Q(N);Q.Unity();
Hess(A,Q,0);
int it = QReigensystem<T>(A, evals, evecs, small);
for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
return it;
}
}
#endif
-242
View File
@@ -1,242 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/Householder.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef HOUSEHOLDER_H
#define HOUSEHOLDER_H
#define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
#define ENTER() std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
#define LEAVE() std::cout << GridLogMessage << "EXIT "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
#include <cstdlib>
#include <string>
#include <cmath>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <fstream>
#include <complex>
#include <algorithm>
namespace Grid {
/** Comparison function for finding the max element in a vector **/
template <class T> bool cf(T i, T j) {
return abs(i) < abs(j);
}
/**
Calculate a real Givens angle
**/
template <class T> inline void Givens_calc(T y, T z, T &c, T &s){
RealD mz = (RealD)abs(z);
if(mz==0.0){
c = 1; s = 0;
}
if(mz >= (RealD)abs(y)){
T t = -y/z;
s = (T)1.0 / sqrt ((T)1.0 + t * t);
c = s * t;
} else {
T t = -z/y;
c = (T)1.0 / sqrt ((T)1.0 + t * t);
s = c * t;
}
}
template <class T> inline void Givens_mult(DenseMatrix<T> &A, int i, int k, T c, T s, int dir)
{
int q ; SizeSquare(A,q);
if(dir == 0){
for(int j=0;j<q;j++){
T nu = A[i][j];
T w = A[k][j];
A[i][j] = (c*nu + s*w);
A[k][j] = (-s*nu + c*w);
}
}
if(dir == 1){
for(int j=0;j<q;j++){
T nu = A[j][i];
T w = A[j][k];
A[j][i] = (c*nu - s*w);
A[j][k] = (s*nu + c*w);
}
}
}
/**
from input = x;
Compute the complex Householder vector, v, such that
P = (I - b v transpose(v) )
b = 2/v.v
P | x | | x | k = 0
| x | | 0 |
| x | = | 0 |
| x | | 0 | j = 3
| x | | x |
These are the "Unreduced" Householder vectors.
**/
template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, DenseVector<T> &v, T &beta)
{
int N ; Size(input,N);
T m = *max_element(input.begin() + k, input.begin() + j + 1, cf<T> );
if(abs(m) > 0.0){
T alpha = 0;
for(int i=k; i<j+1; i++){
v[i] = input[i]/m;
alpha = alpha + v[i]*conj(v[i]);
}
alpha = sqrt(alpha);
beta = (T)1.0/(alpha*(alpha + abs(v[k]) ));
if(abs(v[k]) > 0.0) v[k] = v[k] + (v[k]/abs(v[k]))*alpha;
else v[k] = -alpha;
} else{
for(int i=k; i<j+1; i++){
v[i] = 0.0;
}
}
}
/**
from input = x;
Compute the complex Householder vector, v, such that
P = (I - b v transpose(v) )
b = 2/v.v
Px = alpha*e_dir
These are the "Unreduced" Householder vectors.
**/
template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, int dir, DenseVector<T> &v, T &beta)
{
int N = input.size();
T m = *max_element(input.begin() + k, input.begin() + j + 1, cf);
if(abs(m) > 0.0){
T alpha = 0;
for(int i=k; i<j+1; i++){
v[i] = input[i]/m;
alpha = alpha + v[i]*conj(v[i]);
}
alpha = sqrt(alpha);
beta = 1.0/(alpha*(alpha + abs(v[dir]) ));
if(abs(v[dir]) > 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha;
else v[dir] = -alpha;
}else{
for(int i=k; i<j+1; i++){
v[i] = 0.0;
}
}
}
/**
Compute the product PA if trans = 0
AP if trans = 1
P = (I - b v transpose(v) )
b = 2/v.v
start at element l of matrix A
v is of length j - k + 1 of v are nonzero
**/
template <class T> inline void Householder_mult(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int k, int j, int trans)
{
int N ; SizeSquare(A,N);
if(abs(beta) > 0.0){
for(int p=l; p<N; p++){
T s = 0;
if(trans==0){
for(int i=k;i<j+1;i++) s += conj(v[i-k])*A[i][p];
s *= beta;
for(int i=k;i<j+1;i++){ A[i][p] = A[i][p]-s*conj(v[i-k]);}
} else {
for(int i=k;i<j+1;i++){ s += conj(v[i-k])*A[p][i];}
s *= beta;
for(int i=k;i<j+1;i++){ A[p][i]=A[p][i]-s*conj(v[i-k]);}
}
}
}
}
/**
Compute the product PA if trans = 0
AP if trans = 1
P = (I - b v transpose(v) )
b = 2/v.v
start at element l of matrix A
v is of length j - k + 1 of v are nonzero
A is tridiagonal
**/
template <class T> inline void Householder_mult_tri(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int M, int k, int j, int trans)
{
if(abs(beta) > 0.0){
int N ; SizeSquare(A,N);
DenseMatrix<T> tmp; Resize(tmp,N,N); Fill(tmp,0);
T s;
for(int p=l; p<M; p++){
s = 0;
if(trans==0){
for(int i=k;i<j+1;i++) s = s + conj(v[i-k])*A[i][p];
}else{
for(int i=k;i<j+1;i++) s = s + v[i-k]*A[p][i];
}
s = beta*s;
if(trans==0){
for(int i=k;i<j+1;i++) tmp[i][p] = tmp(i,p) - s*v[i-k];
}else{
for(int i=k;i<j+1;i++) tmp[p][i] = tmp[p][i] - s*conj(v[i-k]);
}
}
for(int p=l; p<M; p++){
if(trans==0){
for(int i=k;i<j+1;i++) A[i][p] = A[i][p] + tmp[i][p];
}else{
for(int i=k;i<j+1;i++) A[p][i] = A[p][i] + tmp[p][i];
}
}
}
}
}
#endif
File diff suppressed because it is too large Load Diff
-453
View File
@@ -1,453 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/Matrix.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef MATRIX_H
#define MATRIX_H
#include <cstdlib>
#include <string>
#include <cmath>
#include <vector>
#include <iostream>
#include <iomanip>
#include <complex>
#include <typeinfo>
#include <Grid/Grid.h>
/** Sign function **/
template <class T> T sign(T p){return ( p/abs(p) );}
/////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////// Hijack STL containers for our wicked means /////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class T> using Vector = Vector<T>;
template<class T> using Matrix = Vector<Vector<T> >;
template<class T> void Resize(Vector<T > & vec, int N) { vec.resize(N); }
template<class T> void Resize(Matrix<T > & mat, int N, int M) {
mat.resize(N);
for(int i=0;i<N;i++){
mat[i].resize(M);
}
}
template<class T> void Size(Vector<T> & vec, int &N)
{
N= vec.size();
}
template<class T> void Size(Matrix<T> & mat, int &N,int &M)
{
N= mat.size();
M= mat[0].size();
}
template<class T> void SizeSquare(Matrix<T> & mat, int &N)
{
int M; Size(mat,N,M);
assert(N==M);
}
template<class T> void SizeSame(Matrix<T> & mat1,Matrix<T> &mat2, int &N1,int &M1)
{
int N2,M2;
Size(mat1,N1,M1);
Size(mat2,N2,M2);
assert(N1==N2);
assert(M1==M2);
}
//*****************************************
//* (Complex) Vector operations *
//*****************************************
/**Conj of a Vector **/
template <class T> Vector<T> conj(Vector<T> p){
Vector<T> q(p.size());
for(int i=0;i<p.size();i++){q[i] = conj(p[i]);}
return q;
}
/** Norm of a Vector**/
template <class T> T norm(Vector<T> p){
T sum = 0;
for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
return abs(sqrt(sum));
}
/** Norm squared of a Vector **/
template <class T> T norm2(Vector<T> p){
T sum = 0;
for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
return abs((sum));
}
/** Sum elements of a Vector **/
template <class T> T trace(Vector<T> p){
T sum = 0;
for(int i=0;i<p.size();i++){sum = sum + p[i];}
return sum;
}
/** Fill a Vector with constant c **/
template <class T> void Fill(Vector<T> &p, T c){
for(int i=0;i<p.size();i++){p[i] = c;}
}
/** Normalize a Vector **/
template <class T> void normalize(Vector<T> &p){
T m = norm(p);
if( abs(m) > 0.0) for(int i=0;i<p.size();i++){p[i] /= m;}
}
/** Vector by scalar **/
template <class T, class U> Vector<T> times(Vector<T> p, U s){
for(int i=0;i<p.size();i++){p[i] *= s;}
return p;
}
template <class T, class U> Vector<T> times(U s, Vector<T> p){
for(int i=0;i<p.size();i++){p[i] *= s;}
return p;
}
/** inner product of a and b = conj(a) . b **/
template <class T> T inner(Vector<T> a, Vector<T> b){
T m = 0.;
for(int i=0;i<a.size();i++){m = m + conj(a[i])*b[i];}
return m;
}
/** sum of a and b = a + b **/
template <class T> Vector<T> add(Vector<T> a, Vector<T> b){
Vector<T> m(a.size());
for(int i=0;i<a.size();i++){m[i] = a[i] + b[i];}
return m;
}
/** sum of a and b = a - b **/
template <class T> Vector<T> sub(Vector<T> a, Vector<T> b){
Vector<T> m(a.size());
for(int i=0;i<a.size();i++){m[i] = a[i] - b[i];}
return m;
}
/**
*********************************
* Matrices *
*********************************
**/
template<class T> void Fill(Matrix<T> & mat, T&val) {
int N,M;
Size(mat,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
mat[i][j] = val;
}}
}
/** Transpose of a matrix **/
Matrix<T> Transpose(Matrix<T> & mat){
int N,M;
Size(mat,N,M);
Matrix C; Resize(C,M,N);
for(int i=0;i<M;i++){
for(int j=0;j<N;j++){
C[i][j] = mat[j][i];
}}
return C;
}
/** Set Matrix to unit matrix **/
template<class T> void Unity(Matrix<T> &mat){
int N; SizeSquare(mat,N);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
if ( i==j ) A[i][j] = 1;
else A[i][j] = 0;
}
}
}
/** Add C * I to matrix **/
template<class T>
void PlusUnit(Matrix<T> & A,T c){
int dim; SizeSquare(A,dim);
for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}
}
/** return the Hermitian conjugate of matrix **/
Matrix<T> HermitianConj(Matrix<T> &mat){
int dim; SizeSquare(mat,dim);
Matrix<T> C; Resize(C,dim,dim);
for(int i=0;i<dim;i++){
for(int j=0;j<dim;j++){
C[i][j] = conj(mat[j][i]);
}
}
return C;
}
/** return diagonal entries as a Vector **/
Vector<T> diag(Matrix<T> &A)
{
int dim; SizeSquare(A,dim);
Vector<T> d; Resize(d,dim);
for(int i=0;i<dim;i++){
d[i] = A[i][i];
}
return d;
}
/** Left multiply by a Vector **/
Vector<T> operator *(Vector<T> &B,Matrix<T> &A)
{
int K,M,N;
Size(B,K);
Size(A,M,N);
assert(K==M);
Vector<T> C; Resize(C,N);
for(int j=0;j<N;j++){
T sum = 0.0;
for(int i=0;i<M;i++){
sum += B[i] * A[i][j];
}
C[j] = sum;
}
return C;
}
/** return 1/diagonal entries as a Vector **/
Vector<T> inv_diag(Matrix<T> & A){
int dim; SizeSquare(A,dim);
Vector<T> d; Resize(d,dim);
for(int i=0;i<dim;i++){
d[i] = 1.0/A[i][i];
}
return d;
}
/** Matrix Addition **/
inline Matrix<T> operator + (Matrix<T> &A,Matrix<T> &B)
{
int N,M ; SizeSame(A,B,N,M);
Matrix C; Resize(C,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
C[i][j] = A[i][j] + B[i][j];
}
}
return C;
}
/** Matrix Subtraction **/
inline Matrix<T> operator- (Matrix<T> & A,Matrix<T> &B){
int N,M ; SizeSame(A,B,N,M);
Matrix C; Resize(C,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
C[i][j] = A[i][j] - B[i][j];
}}
return C;
}
/** Matrix scalar multiplication **/
inline Matrix<T> operator* (Matrix<T> & A,T c){
int N,M; Size(A,N,M);
Matrix C; Resize(C,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
C[i][j] = A[i][j]*c;
}}
return C;
}
/** Matrix Matrix multiplication **/
inline Matrix<T> operator* (Matrix<T> &A,Matrix<T> &B){
int K,L,N,M;
Size(A,K,L);
Size(B,N,M); assert(L==N);
Matrix C; Resize(C,K,M);
for(int i=0;i<K;i++){
for(int j=0;j<M;j++){
T sum = 0.0;
for(int k=0;k<N;k++) sum += A[i][k]*B[k][j];
C[i][j] =sum;
}
}
return C;
}
/** Matrix Vector multiplication **/
inline Vector<T> operator* (Matrix<T> &A,Vector<T> &B){
int M,N,K;
Size(A,N,M);
Size(B,K); assert(K==M);
Vector<T> C; Resize(C,N);
for(int i=0;i<N;i++){
T sum = 0.0;
for(int j=0;j<M;j++) sum += A[i][j]*B[j];
C[i] = sum;
}
return C;
}
/** Some version of Matrix norm **/
/*
inline T Norm(){ // this is not a usual L2 norm
T norm = 0;
for(int i=0;i<dim;i++){
for(int j=0;j<dim;j++){
norm += abs(A[i][j]);
}}
return norm;
}
*/
/** Some version of Matrix norm **/
template<class T> T LargestDiag(Matrix<T> &A)
{
int dim ; SizeSquare(A,dim);
T ld = abs(A[0][0]);
for(int i=1;i<dim;i++){
T cf = abs(A[i][i]);
if(abs(cf) > abs(ld) ){ld = cf;}
}
return ld;
}
/** Look for entries on the leading subdiagonal that are smaller than 'small' **/
template <class T,class U> int Chop_subdiag(Matrix<T> &A,T norm, int offset, U small)
{
int dim; SizeSquare(A,dim);
for(int l = dim - 1 - offset; l >= 1; l--) {
if((U)abs(A[l][l - 1]) < (U)small) {
A[l][l-1]=(U)0.0;
return l;
}
}
return 0;
}
/** Look for entries on the leading subdiagonal that are smaller than 'small' **/
template <class T,class U> int Chop_symm_subdiag(Matrix<T> & A,T norm, int offset, U small)
{
int dim; SizeSquare(A,dim);
for(int l = dim - 1 - offset; l >= 1; l--) {
if((U)abs(A[l][l - 1]) < (U)small) {
A[l][l - 1] = (U)0.0;
A[l - 1][l] = (U)0.0;
return l;
}
}
return 0;
}
/**Assign a submatrix to a larger one**/
template<class T>
void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
{
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
A[i][j] = S[i - row_st][j - col_st];
}
}
}
/**Get a square submatrix**/
template <class T>
Matrix<T> GetSubMtx(Matrix<T> &A,int row_st, int row_end, int col_st, int col_end)
{
Matrix<T> H; Resize(row_end - row_st,col_end-col_st);
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
H[i-row_st][j-col_st]=A[i][j];
}}
return H;
}
/**Assign a submatrix to a larger one NB remember Vector Vectors are transposes of the matricies they represent**/
template<class T>
void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
{
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
A[i][j] = S[i - row_st][j - col_st];
}}
}
/** compute b_i A_ij b_j **/ // surprised no Conj
template<class T> T proj(Matrix<T> A, Vector<T> B){
int dim; SizeSquare(A,dim);
int dimB; Size(B,dimB);
assert(dimB==dim);
T C = 0;
for(int i=0;i<dim;i++){
T sum = 0.0;
for(int j=0;j<dim;j++){
sum += A[i][j]*B[j];
}
C += B[i]*sum; // No conj?
}
return C;
}
/*
*************************************************************
*
* Matrix Vector products
*
*************************************************************
*/
// Instead make a linop and call my CG;
/// q -> q Q
template <class T,class Fermion> void times(Vector<Fermion> &q, Matrix<T> &Q)
{
int M; SizeSquare(Q,M);
int N; Size(q,N);
assert(M==N);
times(q,Q,N);
}
/// q -> q Q
template <class T> void times(multi1d<LatticeFermion> &q, Matrix<T> &Q, int N)
{
GridBase *grid = q[0]._grid;
int M; SizeSquare(Q,M);
int K; Size(q,K);
assert(N<M);
assert(N<K);
Vector<Fermion> S(N,grid );
for(int j=0;j<N;j++){
S[j] = zero;
for(int k=0;k<N;k++){
S[j] = S[j] + q[k]* Q[k][j];
}
}
for(int j=0;j<q.size();j++){
q[j] = S[j];
}
}
#endif
+240
View File
@@ -63,6 +63,85 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
*/
namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackStaggeredSolve {
private:
OperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver) :
_HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=0;
};
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
Field src_e(grid);
Field src_o(grid);
Field sol_e(grid);
Field sol_o(grid);
Field tmp(grid);
Field Mtmp(grid);
Field resid(fgrid);
pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
_Matrix.Mooee(tmp,src_o); assert(src_o.checkerboard ==Odd);
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
// Verify the unprec residual
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
}
};
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
@@ -141,5 +220,166 @@ namespace Grid {
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagTwoSolve {
private:
OperatorFunction<Field> & _HermitianRBSolver;
int CBfactorise;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver) :
_HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=0;
};
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
Field src_e(grid);
Field src_o(grid);
Field sol_e(grid);
Field sol_o(grid);
Field tmp(grid);
Field Mtmp(grid);
Field resid(fgrid);
pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
// get the right MpcDag
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
_HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
// Verify the unprec residual
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SchurRedBlackDiagTwoMixed {
private:
LinearFunction<Field> & _HermitianRBSolver;
int CBfactorise;
public:
/////////////////////////////////////////////////////
// Wrap the usual normal equations Schur trick
/////////////////////////////////////////////////////
SchurRedBlackDiagTwoMixed(LinearFunction<Field> &HermitianRBSolver) :
_HermitianRBSolver(HermitianRBSolver)
{
CBfactorise=0;
};
template<class Matrix>
void operator() (Matrix & _Matrix,const Field &in, Field &out){
// FIXME CGdiagonalMee not implemented virtual function
// FIXME use CBfactorise to control schur decomp
GridBase *grid = _Matrix.RedBlackGrid();
GridBase *fgrid= _Matrix.Grid();
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
Field src_e(grid);
Field src_o(grid);
Field sol_e(grid);
Field sol_o(grid);
Field tmp(grid);
Field Mtmp(grid);
Field resid(fgrid);
pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
// get the right MpcDag
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
// _HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
_HermitianRBSolver(src_o,tmp); assert(tmp.checkerboard==Odd);
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
///////////////////////////////////////////////////
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
// Verify the unprec residual
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
}
};
}
#endif
-15
View File
@@ -1,15 +0,0 @@
- ConjugateGradientMultiShift
- MCR
- Potentially Useful Boost libraries
- MultiArray
- Aligned allocator; memory pool
- Remez -- Mike or Boost?
- Multiprecision
- quaternians
- Tokenize
- Serialization
- Regex
- Proto (ET)
- uBlas
-122
View File
@@ -1,122 +0,0 @@
#include <math.h>
#include <stdlib.h>
#include <vector>
struct Bisection {
static void get_eig2(int row_num,std::vector<RealD> &ALPHA,std::vector<RealD> &BETA, std::vector<RealD> & eig)
{
int i,j;
std::vector<RealD> evec1(row_num+3);
std::vector<RealD> evec2(row_num+3);
RealD eps2;
ALPHA[1]=0.;
BETHA[1]=0.;
for(i=0;i<row_num-1;i++) {
ALPHA[i+1] = A[i*(row_num+1)].real();
BETHA[i+2] = A[i*(row_num+1)+1].real();
}
ALPHA[row_num] = A[(row_num-1)*(row_num+1)].real();
bisec(ALPHA,BETHA,row_num,1,row_num,1e-10,1e-10,evec1,eps2);
bisec(ALPHA,BETHA,row_num,1,row_num,1e-16,1e-16,evec2,eps2);
// Do we really need to sort here?
int begin=1;
int end = row_num;
int swapped=1;
while(swapped) {
swapped=0;
for(i=begin;i<end;i++){
if(mag(evec2[i])>mag(evec2[i+1])) {
swap(evec2+i,evec2+i+1);
swapped=1;
}
}
end--;
for(i=end-1;i>=begin;i--){
if(mag(evec2[i])>mag(evec2[i+1])) {
swap(evec2+i,evec2+i+1);
swapped=1;
}
}
begin++;
}
for(i=0;i<row_num;i++){
for(j=0;j<row_num;j++) {
if(i==j) H[i*row_num+j]=evec2[i+1];
else H[i*row_num+j]=0.;
}
}
}
static void bisec(std::vector<RealD> &c,
std::vector<RealD> &b,
int n,
int m1,
int m2,
RealD eps1,
RealD relfeh,
std::vector<RealD> &x,
RealD &eps2)
{
std::vector<RealD> wu(n+2);
RealD h,q,x1,xu,x0,xmin,xmax;
int i,a,k;
b[1]=0.0;
xmin=c[n]-fabs(b[n]);
xmax=c[n]+fabs(b[n]);
for(i=1;i<n;i++){
h=fabs(b[i])+fabs(b[i+1]);
if(c[i]+h>xmax) xmax= c[i]+h;
if(c[i]-h<xmin) xmin= c[i]-h;
}
xmax *=2.;
eps2=relfeh*((xmin+xmax)>0.0 ? xmax : -xmin);
if(eps1<=0.0) eps1=eps2;
eps2=0.5*eps1+7.0*(eps2);
x0=xmax;
for(i=m1;i<=m2;i++){
x[i]=xmax;
wu[i]=xmin;
}
for(k=m2;k>=m1;k--){
xu=xmin;
i=k;
do{
if(xu<wu[i]){
xu=wu[i];
i=m1-1;
}
i--;
}while(i>=m1);
if(x0>x[k]) x0=x[k];
while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){
x1=(xu+x0)/2;
a=0;
q=1.0;
for(i=1;i<=n;i++){
q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh);
if(q<0) a++;
}
// printf("x1=%e a=%d\n",x1,a);
if(a<k){
if(a<m1){
xu=x1;
wu[m1]=x1;
}else {
xu=x1;
wu[a+1]=x1;
if(x[a]>x1) x[a]=x1;
}
}else x0=x1;
}
x[k]=(x0+xu)/2;
}
}
}
-1
View File
@@ -1 +0,0 @@
+97
View File
@@ -0,0 +1,97 @@
#include <Grid/GridCore.h>
#include <fcntl.h>
namespace Grid {
int PointerCache::victim;
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
void *PointerCache::Insert(void *ptr,size_t bytes) {
if (bytes < 4096 ) return ptr;
#ifdef GRID_OMP
assert(omp_in_parallel()==0);
#endif
void * ret = NULL;
int v = -1;
for(int e=0;e<Ncache;e++) {
if ( Entries[e].valid==0 ) {
v=e;
break;
}
}
if ( v==-1 ) {
v=victim;
victim = (victim+1)%Ncache;
}
if ( Entries[v].valid ) {
ret = Entries[v].address;
Entries[v].valid = 0;
Entries[v].address = NULL;
Entries[v].bytes = 0;
}
Entries[v].address=ptr;
Entries[v].bytes =bytes;
Entries[v].valid =1;
return ret;
}
void *PointerCache::Lookup(size_t bytes) {
if (bytes < 4096 ) return NULL;
#ifdef _OPENMP
assert(omp_in_parallel()==0);
#endif
for(int e=0;e<Ncache;e++){
if ( Entries[e].valid && ( Entries[e].bytes == bytes ) ) {
Entries[e].valid = 0;
return Entries[e].address;
}
}
return NULL;
}
void check_huge_pages(void *Buf,uint64_t BYTES)
{
#ifdef __linux__
int fd = open("/proc/self/pagemap", O_RDONLY);
assert(fd >= 0);
const int page_size = 4096;
uint64_t virt_pfn = (uint64_t)Buf / page_size;
off_t offset = sizeof(uint64_t) * virt_pfn;
uint64_t npages = (BYTES + page_size-1) / page_size;
uint64_t pagedata[npages];
uint64_t ret = lseek(fd, offset, SEEK_SET);
assert(ret == offset);
ret = ::read(fd, pagedata, sizeof(uint64_t)*npages);
assert(ret == sizeof(uint64_t) * npages);
int nhugepages = npages / 512;
int n4ktotal, nnothuge;
n4ktotal = 0;
nnothuge = 0;
for (int i = 0; i < nhugepages; ++i) {
uint64_t baseaddr = (pagedata[i*512] & 0x7fffffffffffffULL) * page_size;
for (int j = 0; j < 512; ++j) {
uint64_t pageaddr = (pagedata[i*512+j] & 0x7fffffffffffffULL) * page_size;
++n4ktotal;
if (pageaddr != baseaddr + j * page_size)
++nnothuge;
}
}
int rank = CartesianCommunicator::RankWorld();
printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge);
#endif
}
}

Some files were not shown because too many files have changed in this diff Show More