1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-14 22:07:05 +01:00

Compare commits

..

133 Commits

Author SHA1 Message Date
8b91b61b61 First cut at faster GPU slice sum via atomics 2022-12-22 15:13:45 -05:00
4ca1bf7cca Added gauge invariance test 2022-12-21 07:23:16 -05:00
2ff868f7a5 CPU open doesn't need to free space 2022-12-20 05:10:23 -05:00
ede02b6883 Memory manager debug Felix case 2022-12-20 05:10:23 -05:00
1822ced302 Bug fix 2022-12-20 05:10:23 -05:00
37ba32776f More logging 2022-12-20 05:10:23 -05:00
99b3697b03 More loggin 2022-12-20 05:10:23 -05:00
43a45ec97b SSC_START 2022-12-20 05:10:23 -05:00
b00a4142e5 A=A fix 2022-12-20 05:10:23 -05:00
3791bc527b Logging pulled in from dirichlet branch 2022-12-20 05:10:23 -05:00
d8c29f5fcf Updated FFT test for PETSc 2022-12-18 12:05:00 -05:00
281f8101fe Matt FFT test 2022-12-17 20:35:33 -05:00
07acfe89f2 Merge pull request #417 from rrhodgson/feature/fermtoprop
Feature/fermtoprop
2022-12-06 12:45:03 -05:00
40234f531f FermToProp accelerator_for -> thread_for 2022-12-06 17:34:51 +00:00
d49694f38f PropToFerm fix 2022-12-06 15:48:54 +00:00
97a098636d FermToProp 2022-11-30 15:36:35 -05:00
e13930c8b2 Faster fermtoprop case 2022-11-30 15:11:29 -05:00
0655dab466 Open MP on host enabled 2022-11-08 13:38:54 -08:00
7f097bcc28 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2022-11-08 13:23:40 -08:00
5c75aa5008 Device mem 2022-11-08 13:22:57 -08:00
1873101362 PVC 2022-11-08 13:22:45 -08:00
63fd1dfa62 Config on PVC 2022-11-08 13:22:09 -08:00
bd68861b28 SYCL sum 2022-11-08 12:49:26 -08:00
82e959f66c SYCL reduction 2022-11-08 12:45:25 -08:00
62e52de06d Merge pull request #414 from fjosw/feat/eCloverGPU
Compact Exponential Cloverterm on GPU
2022-11-01 09:15:44 -04:00
184adeedb8 feat: renamed open_boundaries to fixedBoundaries 2022-10-26 12:53:46 +01:00
5fa6a8b96d docs: CompactClover debug info generalized. 2022-10-26 12:41:14 +01:00
a2a879b668 docs: CompactClover Debug Info improved. 2022-10-25 17:20:42 +01:00
9317d893b2 docs: details about inversion of CompactClover term added. 2022-10-25 17:10:06 +01:00
86075fdd45 feat: MassTerm and ExponentiateClover merged into InstantiateClover 2022-10-25 17:05:34 +01:00
b36442e263 feat: CloverHelpers::InvertClover implemented which handles the
inversion of the Clover term depending on clover type and the boundary
conditions.
2022-10-25 16:57:01 +01:00
513d797ea6 fix: signature of CompactWilsonCloverHelpers::Exponentiate fixed. 2022-10-25 16:17:22 +01:00
9e4835a3e3 feat: changed CompactWilsonExpClover exponentiation to Taylor expansion
with Horner scheme.
2022-10-25 15:19:43 +01:00
477ebf24f4 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2022-10-04 11:19:43 -07:00
0d5639f707 Run script update 2022-10-04 11:13:41 -07:00
413312f9a9 Benchmark the halo construction.
THe bye counts are out and should be doubled for SIMD directions
2022-10-04 11:12:59 -07:00
03508448f8 Remove verbose 2022-10-04 11:12:15 -07:00
e1e5c75023 Stencil gather improvements - SVM was running slow and used for a pointer array that wasn't needed to be in SVM 2022-10-04 11:11:10 -07:00
9296299b61 Better commenting 2022-10-04 11:10:34 -07:00
913fbca74a Merge pull request #410 from gkanwar/photon_and_sha_patches
Photon.h and SHA256 patches
2022-08-31 18:01:45 -04:00
60dfb49afa Remove FP16 tests when FP16 is disabled 2022-08-21 17:29:55 +02:00
554c238359 Update OpenSSL digest to use high-level methods
This avoids deprecation warnings when compiling against OpenSSL 3.0
but should still be backwards compatible. It is the recommended way
to use the digest API going forward.
2022-08-21 17:28:57 +02:00
f922adf05e Fix Photon ComplexField type 2022-08-21 16:16:18 +02:00
188d2c7a4d PVC default, ignore ATS 2022-08-02 08:38:53 -07:00
17d7177105 Files for SYCL 2022-08-02 08:33:39 -07:00
bb0a0da47a inon blocking caution due to SYCL 2022-08-02 08:09:43 -07:00
84110166e4 Fix the fence 2022-08-02 08:00:43 -07:00
d32b923b6c Fencing on a stream in SYCL is needed. Didn't know that ... gulp 2022-08-02 07:58:04 -07:00
2ab1af5754 Ensure no synchronize and not optoin dependent 2022-07-19 09:51:06 -07:00
5f8892bf03 Mistake pointed out by Camilo 2022-07-19 09:31:51 -07:00
f14e7e51e7 Grid accelerator 2022-07-12 10:56:22 -07:00
042ab1a052 Update GridStd.h 2022-06-27 13:21:39 -04:00
2df98a99bc Merge pull request #406 from giordano/patch-1
Update default value of gen-simd-width in README
2022-06-14 17:46:25 -04:00
315ea18be2 Update default value of gen-simd-width in README 2022-06-14 22:41:05 +01:00
a9c2e1df03 Merge pull request #404 from rrhodgson/feature/json_nvcc
Feature/json nvcc
2022-05-25 13:30:11 -04:00
da4daea57a Updated json to latest release 3.10.5 2022-05-24 16:16:06 +01:00
af3b065add Merge pull request #403 from fjosw/fix/cuda_11_5_warnings
Fixed nvcc 11.5+ warnings
2022-05-24 11:10:02 -04:00
e346154c5d Updated json CUDA compile guards 2022-05-24 15:48:01 +01:00
7937ac2bab fix: conditional pragmas according to new NVCC_DIAG_PRAGMA_SUPPORT standard in pugixml/pugixml.cc 2022-05-24 15:31:03 +01:00
e909aeedf0 fix: conditional pragmas according to new NVCC_DIAG_PRAGMA_SUPPORT standard in Grid_Eigen_Dense.h 2022-05-24 15:29:42 +01:00
bab8aa8eb0 fix: conditional pragmas according to new NVCC_DIAG_PRAGMA_SUPPORT
standard in DisableWarnings.h
2022-05-24 15:27:40 +01:00
38b22f05be Merge pull request #402 from fjosw/fix/clover_warnings
fixed clover warnings
2022-05-24 10:05:27 -04:00
3ca0de1c40 Fix json write for vector<string> 2022-05-24 14:37:33 +01:00
c7205d2a73 Removed nvcc guards for json 2022-05-24 14:30:26 +01:00
617c5362c1 fix: fixed warning: missing return statement at end of non-void function
in CloverHelpers
2022-05-24 11:37:33 +01:00
083b58e66d Merge pull request #401 from JPRichings/LocalCoheranceDeflation
Local coherance batch deflation
2022-05-20 11:44:22 -04:00
633427a2df Merge pull request #400 from JPRichings/wilson_sweep
bench wilson sweep fix
2022-05-20 11:43:40 -04:00
2031d6910a Merge branch 'paboyle:develop' into wilson_sweep 2022-05-20 16:20:23 +01:00
79e34b3eb4 Local Coherence batch deflation 2022-05-19 14:53:17 +01:00
4f3d581ab4 Merge branch 'paboyle:develop' into LocalCoheranceDeflation 2022-05-19 14:46:17 +01:00
d16427b837 Merge pull request #399 from fjosw/fix/Nc_neq_3
fix: assert for dimensions of compact Wilson clover moved to constructor
2022-05-17 09:03:42 -04:00
4b1997e2f3 wilson sweep test 2022-05-16 15:58:33 +01:00
8939d5dc73 bugfix: eo operator called in correct location 2022-05-16 00:28:28 +01:00
b051e00de0 Additional Local Coherance Deflation operator() 2022-05-16 00:25:13 +01:00
8aa75b492f Merge branch 'develop' into fix/Nc_neq_3 2022-05-10 14:22:03 +01:00
0274f40686 Merge pull request #389 from mbruno46/mbruno-eclover
Feature/expClover
2022-05-10 09:18:19 -04:00
77aa147ce5 Merge branch 'develop' into mbruno-eclover 2022-05-10 09:16:53 -04:00
32facbd02a fix: assert for dimensions of compact Wilson clover moved to
constructor.
2022-05-10 10:53:22 +01:00
4de50ab146 Merge pull request #396 from fjosw/fix/readd_config.h
fix: readded Config.h and Version.h to HFILEs in Grid/Makefile.am
2022-05-09 08:26:48 -04:00
8b12a61097 fix: readded Config.h and Version.h to HFILEs in Grid/Makefile.am 2022-05-09 11:53:22 +01:00
79ea027c0b Merge pull request #377 from RJHudspith/develop
NERSC and ILDG for non-SU(3) configuration checkpoints
2022-05-03 08:55:48 -04:00
62339d437f Merge pull request #387 from lehner/feature/gpt
Parity mass terms for domain wall fermions to enable 4d eofa
2022-05-03 08:52:18 -04:00
698e745276 Merge pull request #390 from fjosw/feature/conserved_current_wilson
Conserved current for wilson fermions
2022-05-03 08:51:10 -04:00
9a6e2c315d Merge pull request #394 from fjosw/fix/gauge_fix_ErrorOnNoConverge
SteepestDescentGaugeFix now exits when the algorithm does not converge.
2022-05-03 08:49:26 -04:00
e61fed87db SteepestDescentGaugeFix now exits when the algorithm does not converge.
This behaviour can be altered by setting err_on_no_converge to false.
2022-04-20 15:41:55 +01:00
b8bc560b51 Test_wilson_conserved_current implemented, all 5d references removed. 2022-04-05 17:33:45 +01:00
6bc2483d57 Merge branch 'feature/eclover' into feature/conserved_current_wilson 2022-04-05 15:26:49 +01:00
82aecbf4cf Test_wilson_conserved_current added 2022-04-05 15:26:39 +01:00
ee23a76aa0 Merge pull request #2 from fjosw/feature/eclover
Feature/eclover
2022-04-05 13:30:13 +02:00
d7191e5a02 SeqConservedCurrent implemented for Wilson fermions 2022-04-05 11:48:56 +01:00
c8a824425b Error message added if another conserved current than vector is requested for
Wilson type fermions.
2022-04-05 10:58:22 +01:00
f23626a6b8 End scope by additional block in CloverHelpers.h 2022-04-02 16:08:15 +01:00
6577a03d16 Explcitly closed views in Exponentiate_Clover 2022-04-01 18:39:12 +01:00
427c8695fe Change signs and prefactors for conserved current to mimic the 5d
version.
2022-04-01 16:20:21 +01:00
9e82c468ab Multiplication of diagonal mass in exponentiate fixed for gpus 2022-04-01 15:54:43 +01:00
603fd96747 Missing link multiplication added. 2022-04-01 10:58:56 +01:00
fe993c0836 /=2 replaced by *=0.5 2022-03-31 17:08:17 +01:00
cdf31d52c1 GaugeGrid and typo fixed 2022-03-31 17:04:35 +01:00
0542eaf1da First version of conserved current contraction for Wilson type quarks 2022-03-31 17:02:09 +01:00
317bdcf158 nerscio parametrization 2022-03-24 13:10:47 +01:00
9ca2c98882 Merge branch 'develop' of https://github.com/paboyle/Grid into mbruno-eclover 2022-03-22 15:31:37 +01:00
605cf401e1 Merge branch 'feature/sumd-npr' into develop 2022-03-16 22:43:12 +00:00
f99c3660d2 Merge branch 'feature/cpu-threaded-smp' into develop 2022-03-16 22:07:54 +00:00
92a83a9eb3 Performance improve for Tesseract 2022-03-16 17:14:36 +00:00
53ae01a34a Merge pull request #1 from fjosw/feature/eclover
Feature/eclover
2022-03-15 15:23:35 +01:00
b615fa0f35 Merge pull request #388 from fjosw/feature/sumd-npr
Feature/sumd npr
2022-03-15 09:05:57 -04:00
76c294a7ba open bc fix 2022-03-08 13:55:16 +01:00
0c0c2b1e20 Unnecessary arguments of CloverHelpers::Exponentiate_Clover removed. 2022-03-08 09:44:51 +00:00
e2fc3a0f04 Merge pull request #28 from paboyle/develop
Sync with Upstream
2022-03-08 09:58:51 +01:00
451e7972fd Reintroduced explicit inversion of the Clover term in case of the
CompactExpClover because of the open boundary O(a) improvement. Changed
the timing output to GridLogDebug
2022-03-07 17:43:33 +00:00
56c089d347 Removed leftover comments 2022-03-07 16:40:20 +00:00
acf740e44d Merge pull request #1 from FelixPGZiegler/feature/eclover
Feature/eclover
2022-03-07 16:25:11 +00:00
182f513404 Merge remote-tracking branch 'fjosw/feature/eclover' into feature/eclover 2022-03-07 15:22:04 +00:00
d5b2323a57 included Cayley-Hamilton exponentiation for the compact Wilson exp clover, bug fix for inverse of exp clover 2022-03-07 14:44:24 +00:00
bad18d4417 Merge branch 'paboyle:develop' into feature/eclover 2022-03-07 13:54:10 +00:00
d1decee4cc Cleaned up unused variables in Lattice_reduction_gpu.h 2022-03-02 16:54:23 +00:00
d4ae71b880 sum_gpu_large and sum_gpu templates added. 2022-03-02 15:40:18 +00:00
e16fc5b2e4 Threaded intranode comms transfer - ideally between NUMA domains 2022-03-01 11:17:24 -05:00
694306f202 Configure for mac arm 2022-03-01 10:53:44 -05:00
9aac1e6d64 Merge branch 'develop' into feature/sumd-npr 2022-03-01 10:51:38 -05:00
3e882f555d Large / small sumD options 2022-03-01 08:54:45 -05:00
438caab25f generate_instantiations.sh now correctly produces instantiations for CompactClover variant, redundant instantiations removed. 2022-02-27 18:27:18 +00:00
239e2c1ee6 tests: wilson clover cg tests now include compact variant as well as
exponential wilson clover operators
2022-02-27 18:26:34 +00:00
013dc2ef33 tests: core tests for wilson clover and wilson exp clover including
compact version extended/added
2022-02-27 18:13:47 +00:00
9616811c3d Merge branch 'feature/gpt' of https://github.com/lehner/Grid into feature/gpt 2022-02-24 22:03:05 +01:00
8a3002c03b separate left and right masses for CayleyFermion5D 2022-02-24 22:02:56 +01:00
71034f828e attempt to fix broken WilsonExpClover; Compact version still broken will be replaced by F.Joswig 2022-02-23 01:02:27 +01:00
11437930c5 cleaned up definitions of wilsonclover fermions 2022-02-22 10:45:16 +01:00
3d44aa9cb9 cleaned up cloverhelpers; fixed test compact_clover which runs 2022-02-22 01:10:19 +01:00
2851870d70 expClover support via helpers template class 2022-02-22 00:05:43 +01:00
0bd83cdbda Fixes for Nc!=3 Nersc IO, Gauge and Gauge_NCxNC compatible with GLU. Trace normalisation changed in places removing explicit threes. Guards against non-su3 tests and tests failing when LIME is not compiled. 2021-11-28 21:51:03 +01:00
42d56ea6b6 Verbosity 2021-10-29 02:23:08 +01:00
0b905a72dd Better reduction for GPUs 2021-10-29 02:22:22 +01:00
161 changed files with 17872 additions and 21612 deletions

View File

@ -34,9 +34,6 @@ directory
#if defined __GNUC__ && __GNUC__>=6
#pragma GCC diagnostic ignored "-Wignored-attributes"
#endif
#if defined __GNUC__
#pragma GCC diagnostic ignored "-Wpsabi"
#endif
//disables and intel compiler specific warning (in json.hpp)
@ -47,14 +44,22 @@ directory
#ifdef __NVCC__
//disables nvcc specific warning in json.hpp
#pragma clang diagnostic ignored "-Wdeprecated-register"
#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5)
//disables nvcc specific warning in json.hpp
#pragma nv_diag_suppress unsigned_compare_with_zero
#pragma nv_diag_suppress cast_to_qualified_type
//disables nvcc specific warning in many files
#pragma nv_diag_suppress esa_on_defaulted_function_ignored
#pragma nv_diag_suppress extra_semicolon
#else
//disables nvcc specific warning in json.hpp
#pragma diag_suppress unsigned_compare_with_zero
#pragma diag_suppress cast_to_qualified_type
//disables nvcc specific warning in many files
#pragma diag_suppress esa_on_defaulted_function_ignored
#pragma diag_suppress extra_semicolon
//Eigen only
#endif
#endif
// Disable vectorisation in Eigen on the Power8/9 and PowerPC

View File

@ -36,7 +36,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/GridCore.h>
#include <Grid/qcd/QCD.h>
#include <Grid/qcd/spin/Spin.h>
#include <Grid/qcd/gparity/Gparity.h>
#include <Grid/qcd/utils/Utils.h>
#include <Grid/qcd/representations/Representations.h>
NAMESPACE_CHECK(GridQCDCore);

View File

@ -16,6 +16,7 @@
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>

View File

@ -14,7 +14,11 @@
/* NVCC save and restore compile environment*/
#ifdef __NVCC__
#pragma push
#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5)
#pragma nv_diag_suppress code_is_unreachable
#else
#pragma diag_suppress code_is_unreachable
#endif
#pragma push_macro("__CUDA_ARCH__")
#pragma push_macro("__NVCC__")
#pragma push_macro("__CUDACC__")

View File

@ -54,7 +54,6 @@ NAMESPACE_CHECK(BiCGSTAB);
#include <Grid/algorithms/iterative/SchurRedBlack.h>
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
#include <Grid/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h>
#include <Grid/algorithms/iterative/BiCGSTABMixedPrec.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
#include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>

View File

@ -262,7 +262,7 @@ public:
autoView( Tnp_v , (*Tnp), AcceleratorWrite);
autoView( Tnm_v , (*Tnm), AcceleratorWrite);
const int Nsimd = CComplex::Nsimd();
accelerator_forNB(ss, FineGrid->oSites(), Nsimd, {
accelerator_for(ss, FineGrid->oSites(), Nsimd, {
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
});

View File

@ -264,7 +264,7 @@ public:
auto Tnp_v = Tnp->View();
auto Tnm_v = Tnm->View();
constexpr int Nsimd = vector_type::Nsimd();
accelerator_forNB(ss, in.Grid()->oSites(), Nsimd, {
accelerator_for(ss, in.Grid()->oSites(), Nsimd, {
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
});
@ -292,7 +292,6 @@ public:
template<class Field>
class ChebyshevLanczos : public Chebyshev<Field> {
private:
std::vector<RealD> Coeffs;
int order;
RealD alpha;

View File

@ -49,7 +49,6 @@ NAMESPACE_BEGIN(Grid);
Integer TotalInnerIterations; //Number of inner CG iterations
Integer TotalOuterIterations; //Number of restarts
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
RealD TrueResidual;
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
LinearFunction<FieldF> *guesser;
@ -69,7 +68,6 @@ NAMESPACE_BEGIN(Grid);
}
void operator() (const FieldD &src_d_in, FieldD &sol_d){
std::cout << GridLogMessage << "MixedPrecisionConjugateGradient: Starting mixed precision CG with outer tolerance " << Tolerance << " and inner tolerance " << InnerTolerance << std::endl;
TotalInnerIterations = 0;
GridStopWatch TotalTimer;
@ -82,11 +80,6 @@ NAMESPACE_BEGIN(Grid);
RealD stop = src_norm * Tolerance*Tolerance;
GridBase* DoublePrecGrid = src_d_in.Grid();
//Generate precision change workspaces
precisionChangeWorkspace wk_dp_from_sp(DoublePrecGrid, SinglePrecGrid);
precisionChangeWorkspace wk_sp_from_dp(SinglePrecGrid, DoublePrecGrid);
FieldD tmp_d(DoublePrecGrid);
tmp_d.Checkerboard() = cb;
@ -104,7 +97,6 @@ NAMESPACE_BEGIN(Grid);
FieldF sol_f(SinglePrecGrid);
sol_f.Checkerboard() = cb;
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting initial inner CG with tolerance " << inner_tol << std::endl;
ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations);
CG_f.ErrorOnNoConverge = false;
@ -128,7 +120,7 @@ NAMESPACE_BEGIN(Grid);
while(norm * inner_tol * inner_tol < stop) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ??
PrecChangeTimer.Start();
precisionChange(src_f, src_d, wk_sp_from_dp);
precisionChange(src_f, src_d);
PrecChangeTimer.Stop();
sol_f = Zero();
@ -138,7 +130,6 @@ NAMESPACE_BEGIN(Grid);
(*guesser)(src_f, sol_f);
//Inner CG
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " << outer_iter << " starting inner CG with tolerance " << inner_tol << std::endl;
CG_f.Tolerance = inner_tol;
InnerCGtimer.Start();
CG_f(Linop_f, src_f, sol_f);
@ -147,7 +138,7 @@ NAMESPACE_BEGIN(Grid);
//Convert sol back to double and add to double prec solution
PrecChangeTimer.Start();
precisionChange(tmp_d, sol_f, wk_dp_from_sp);
precisionChange(tmp_d, sol_f);
PrecChangeTimer.Stop();
axpy(sol_d, 1.0, tmp_d, sol_d);
@ -159,7 +150,6 @@ NAMESPACE_BEGIN(Grid);
ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
CG_d(Linop_d, src_d_in, sol_d);
TotalFinalStepIterations = CG_d.IterationsToComplete;
TrueResidual = CG_d.TrueResidual;
TotalTimer.Stop();
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl;

View File

@ -52,7 +52,7 @@ public:
MultiShiftFunction shifts;
std::vector<RealD> TrueResidualShift;
ConjugateGradientMultiShift(Integer maxit, const MultiShiftFunction &_shifts) :
ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) :
MaxIterations(maxit),
shifts(_shifts)
{
@ -182,9 +182,6 @@ public:
for(int s=0;s<nshift;s++) {
axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
}
std::cout << GridLogIterative << "ConjugateGradientMultiShift: initial rn (|src|^2) =" << rn << " qq (|MdagM src|^2) =" << qq << " d ( dot(src, [MdagM + m_0]src) ) =" << d << " c=" << c << std::endl;
///////////////////////////////////////
// Timers

View File

@ -1,411 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Christopher Kelly <ckelly@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H
#define GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H
NAMESPACE_BEGIN(Grid);
//CK 2020: A variant of the multi-shift conjugate gradient with the matrix multiplication in single precision.
//The residual is stored in single precision, but the search directions and solution are stored in double precision.
//Every update_freq iterations the residual is corrected in double precision.
//For safety the a final regular CG is applied to clean up if necessary
//Linop to add shift to input linop, used in cleanup CG
namespace ConjugateGradientMultiShiftMixedPrecSupport{
template<typename Field>
class ShiftedLinop: public LinearOperatorBase<Field>{
public:
LinearOperatorBase<Field> &linop_base;
RealD shift;
ShiftedLinop(LinearOperatorBase<Field> &_linop_base, RealD _shift): linop_base(_linop_base), shift(_shift){}
void OpDiag (const Field &in, Field &out){ assert(0); }
void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); }
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); }
void Op (const Field &in, Field &out){ assert(0); }
void AdjOp (const Field &in, Field &out){ assert(0); }
void HermOp(const Field &in, Field &out){
linop_base.HermOp(in, out);
axpy(out, shift, in, out);
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
HermOp(in,out);
ComplexD dot = innerProduct(in,out);
n1=real(dot);
n2=norm2(out);
}
};
};
template<class FieldD, class FieldF,
typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
class ConjugateGradientMultiShiftMixedPrec : public OperatorMultiFunction<FieldD>,
public OperatorFunction<FieldD>
{
public:
using OperatorFunction<FieldD>::operator();
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
std::vector<int> IterationsToCompleteShift; // Iterations for this shift
int verbose;
MultiShiftFunction shifts;
std::vector<RealD> TrueResidualShift;
int ReliableUpdateFreq; //number of iterations between reliable updates
GridBase* SinglePrecGrid; //Grid for single-precision fields
LinearOperatorBase<FieldF> &Linop_f; //single precision
ConjugateGradientMultiShiftMixedPrec(Integer maxit, const MultiShiftFunction &_shifts,
GridBase* _SinglePrecGrid, LinearOperatorBase<FieldF> &_Linop_f,
int _ReliableUpdateFreq
) :
MaxIterations(maxit), shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq)
{
verbose=1;
IterationsToCompleteShift.resize(_shifts.order);
TrueResidualShift.resize(_shifts.order);
}
void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, FieldD &psi)
{
GridBase *grid = src.Grid();
int nshift = shifts.order;
std::vector<FieldD> results(nshift,grid);
(*this)(Linop,src,results,psi);
}
void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, std::vector<FieldD> &results, FieldD &psi)
{
int nshift = shifts.order;
(*this)(Linop,src,results);
psi = shifts.norm*src;
for(int i=0;i<nshift;i++){
psi = psi + shifts.residues[i]*results[i];
}
return;
}
void operator() (LinearOperatorBase<FieldD> &Linop_d, const FieldD &src_d, std::vector<FieldD> &psi_d)
{
GridBase *DoublePrecGrid = src_d.Grid();
precisionChangeWorkspace wk_f_from_d(SinglePrecGrid, DoublePrecGrid);
precisionChangeWorkspace wk_d_from_f(DoublePrecGrid, SinglePrecGrid);
////////////////////////////////////////////////////////////////////////
// Convenience references to the info stored in "MultiShiftFunction"
////////////////////////////////////////////////////////////////////////
int nshift = shifts.order;
std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts"
std::vector<RealD> &mresidual(shifts.tolerances);
std::vector<RealD> alpha(nshift,1.0);
//Double precision search directions
FieldD p_d(DoublePrecGrid);
std::vector<FieldD> ps_d(nshift, DoublePrecGrid);// Search directions (double precision)
FieldD tmp_d(DoublePrecGrid);
FieldD r_d(DoublePrecGrid);
FieldD mmp_d(DoublePrecGrid);
assert(psi_d.size()==nshift);
assert(mass.size()==nshift);
assert(mresidual.size()==nshift);
// dynamic sized arrays on stack; 2d is a pain with vector
RealD bs[nshift];
RealD rsq[nshift];
RealD z[nshift][2];
int converged[nshift];
const int primary =0;
//Primary shift fields CG iteration
RealD a,b,c,d;
RealD cp,bp,qq; //prev
// Matrix mult fields
FieldF r_f(SinglePrecGrid);
FieldF p_f(SinglePrecGrid);
FieldF tmp_f(SinglePrecGrid);
FieldF mmp_f(SinglePrecGrid);
FieldF src_f(SinglePrecGrid);
precisionChange(src_f, src_d, wk_f_from_d);
// Check lightest mass
for(int s=0;s<nshift;s++){
assert( mass[s]>= mass[primary] );
converged[s]=0;
}
// Wire guess to zero
// Residuals "r" are src
// First search direction "p" is also src
cp = norm2(src_d);
// Handle trivial case of zero src.
if( cp == 0. ){
for(int s=0;s<nshift;s++){
psi_d[s] = Zero();
IterationsToCompleteShift[s] = 1;
TrueResidualShift[s] = 0.;
}
return;
}
for(int s=0;s<nshift;s++){
rsq[s] = cp * mresidual[s] * mresidual[s];
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift "<< s <<" target resid "<<rsq[s]<<std::endl;
ps_d[s] = src_d;
}
// r and p for primary
r_f=src_f; //residual maintained in single
p_f=src_f;
p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys
//MdagM+m[0]
Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
axpy(mmp_f,mass[0],p_f,mmp_f);
RealD rn = norm2(p_f);
d += rn*mass[0];
b = -cp /d;
// Set up the various shift variables
int iz=0;
z[0][1-iz] = 1.0;
z[0][iz] = 1.0;
bs[0] = b;
for(int s=1;s<nshift;s++){
z[s][1-iz] = 1.0;
z[s][iz] = 1.0/( 1.0 - b*(mass[s]-mass[0]));
bs[s] = b*z[s][iz];
}
// r += b[0] A.p[0]
// c= norm(r)
c=axpy_norm(r_f,b,mmp_f,r_f);
for(int s=0;s<nshift;s++) {
axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d);
}
///////////////////////////////////////
// Timers
///////////////////////////////////////
GridStopWatch AXPYTimer, ShiftTimer, QRTimer, MatrixTimer, SolverTimer, PrecChangeTimer, CleanupTimer;
SolverTimer.Start();
// Iteration loop
int k;
for (k=1;k<=MaxIterations;k++){
a = c /cp;
//Update double precision search direction by residual
PrecChangeTimer.Start();
precisionChange(r_d, r_f, wk_d_from_f);
PrecChangeTimer.Stop();
AXPYTimer.Start();
axpy(p_d,a,p_d,r_d);
for(int s=0;s<nshift;s++){
if ( ! converged[s] ) {
if (s==0){
axpy(ps_d[s],a,ps_d[s],r_d);
} else{
RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b);
axpby(ps_d[s],z[s][iz],as,r_d,ps_d[s]);
}
}
}
AXPYTimer.Stop();
PrecChangeTimer.Start();
precisionChange(p_f, p_d, wk_f_from_d); //get back single prec search direction for linop
PrecChangeTimer.Stop();
cp=c;
MatrixTimer.Start();
Linop_f.HermOp(p_f,mmp_f);
d=real(innerProduct(p_f,mmp_f));
MatrixTimer.Stop();
AXPYTimer.Start();
axpy(mmp_f,mass[0],p_f,mmp_f);
AXPYTimer.Stop();
RealD rn = norm2(p_f);
d += rn*mass[0];
bp=b;
b=-cp/d;
// Toggle the recurrence history
bs[0] = b;
iz = 1-iz;
ShiftTimer.Start();
for(int s=1;s<nshift;s++){
if((!converged[s])){
RealD z0 = z[s][1-iz];
RealD z1 = z[s][iz];
z[s][iz] = z0*z1*bp
/ (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
}
}
ShiftTimer.Stop();
//Update double precision solutions
AXPYTimer.Start();
for(int s=0;s<nshift;s++){
int ss = s;
if( (!converged[s]) ) {
axpy(psi_d[ss],-bs[s]*alpha[s],ps_d[s],psi_d[ss]);
}
}
//Perform reliable update if necessary; otherwise update residual from single-prec mmp
RealD c_f = axpy_norm(r_f,b,mmp_f,r_f);
AXPYTimer.Stop();
c = c_f;
if(k % ReliableUpdateFreq == 0){
//Replace r with true residual
MatrixTimer.Start();
Linop_d.HermOp(psi_d[0],mmp_d);
MatrixTimer.Stop();
AXPYTimer.Start();
axpy(mmp_d,mass[0],psi_d[0],mmp_d);
RealD c_d = axpy_norm(r_d, -1.0, mmp_d, src_d);
AXPYTimer.Stop();
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<< ", replaced |r|^2 = "<<c_f <<" with |r|^2 = "<<c_d<<std::endl;
PrecChangeTimer.Start();
precisionChange(r_f, r_d, wk_f_from_d);
PrecChangeTimer.Stop();
c = c_d;
}
// Convergence checks
int all_converged = 1;
for(int s=0;s<nshift;s++){
if ( (!converged[s]) ){
IterationsToCompleteShift[s] = k;
RealD css = c * z[s][iz]* z[s][iz];
if(css<rsq[s]){
if ( ! converged[s] )
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
converged[s]=1;
} else {
all_converged=0;
}
}
}
if ( all_converged ){
SolverTimer.Stop();
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: All shifts have converged iteration "<<k<<std::endl;
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: Checking solutions"<<std::endl;
// Check answers
for(int s=0; s < nshift; s++) {
Linop_d.HermOpAndNorm(psi_d[s],mmp_d,d,qq);
axpy(tmp_d,mass[s],psi_d[s],mmp_d);
axpy(r_d,-alpha[s],src_d,tmp_d);
RealD rn = norm2(r_d);
RealD cn = norm2(src_d);
TrueResidualShift[s] = std::sqrt(rn/cn);
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift["<<s<<"] true residual "<< TrueResidualShift[s] << " target " << mresidual[s] << std::endl;
//If we have not reached the desired tolerance, do a (mixed precision) CG cleanup
if(rn >= rsq[s]){
CleanupTimer.Start();
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: performing cleanup step for shift " << s << std::endl;
//Setup linear operators for final cleanup
ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldD> Linop_shift_d(Linop_d, mass[s]);
ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldF> Linop_shift_f(Linop_f, mass[s]);
MixedPrecisionConjugateGradient<FieldD,FieldF> cg(mresidual[s], MaxIterations, MaxIterations, SinglePrecGrid, Linop_shift_f, Linop_shift_d);
cg(src_d, psi_d[s]);
TrueResidualShift[s] = cg.TrueResidual;
CleanupTimer.Stop();
}
}
std::cout << GridLogMessage << "ConjugateGradientMultiShiftMixedPrec: Time Breakdown for body"<<std::endl;
std::cout << GridLogMessage << "\tSolver " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\t\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\t\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\t\tShift " << ShiftTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\t\tPrecision Change " << PrecChangeTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tFinal Cleanup " << CleanupTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tSolver+Cleanup " << SolverTimer.Elapsed() + CleanupTimer.Elapsed() << std::endl;
IterationsToComplete = k;
return;
}
}
// ugly hack
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
// assert(0);
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -113,7 +113,43 @@ public:
blockPromote(guess_coarse,guess,subspace);
guess.Checkerboard() = src.Checkerboard();
};
};
void operator()(const std::vector<FineField> &src,std::vector<FineField> &guess) {
int Nevec = (int)evec_coarse.size();
int Nsrc = (int)src.size();
// make temp variables
std::vector<CoarseField> src_coarse(Nsrc,evec_coarse[0].Grid());
std::vector<CoarseField> guess_coarse(Nsrc,evec_coarse[0].Grid());
//Preporcessing
std::cout << GridLogMessage << "Start BlockProject for loop" << std::endl;
for (int j=0;j<Nsrc;j++)
{
guess_coarse[j] = Zero();
std::cout << GridLogMessage << "BlockProject iter: " << j << std::endl;
blockProject(src_coarse[j],src[j],subspace);
}
//deflation set up for eigen vector batchsize 1 and source batch size equal number of sources
std::cout << GridLogMessage << "Start ProjectAccum for loop" << std::endl;
for (int i=0;i<Nevec;i++)
{
std::cout << GridLogMessage << "ProjectAccum Nvec: " << i << std::endl;
const CoarseField & tmp = evec_coarse[i];
for (int j=0;j<Nsrc;j++)
{
axpy(guess_coarse[j],TensorRemove(innerProduct(tmp,src_coarse[j])) / eval_coarse[i],tmp,guess_coarse[j]);
}
}
//postprocessing
std::cout << GridLogMessage << "Start BlockPromote for loop" << std::endl;
for (int j=0;j<Nsrc;j++)
{
std::cout << GridLogMessage << "BlockProject iter: " << j << std::endl;
blockPromote(guess_coarse[j],guess[j],subspace);
guess[j].Checkerboard() = src[j].Checkerboard();
}
};
};

View File

@ -44,7 +44,6 @@ public:
int, MinRes); // Must restart
};
//This class is the input parameter class for some testing programs
struct LocalCoherenceLanczosParams : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
@ -156,7 +155,6 @@ public:
_coarse_relax_tol(coarse_relax_tol)
{ };
//evalMaxApprox: approximation of largest eval of the fine Chebyshev operator (suitably wrapped by block projection)
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
{
CoarseField v(B);
@ -183,16 +181,8 @@ public:
if( (vv<eresid*eresid) ) conv = 1;
return conv;
}
//This function is called at the end of the coarse grid Lanczos. It promotes the coarse eigenvector 'B' to the fine grid,
//applies a smoother to the result then computes the computes the *fine grid* eigenvalue (output as 'eval').
//evalMaxApprox should be the approximation of the largest eval of the fine Hermop. However when this function is called by IRL it actually passes the largest eval of the *Chebyshev* operator (as this is the max approx used for the TestConvergence above)
//As the largest eval of the Chebyshev is typically several orders of magnitude larger this makes the convergence test pass even when it should not.
//We therefore ignore evalMaxApprox here and use a value of 1.0 (note this value is already used by TestCoarse)
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
{
evalMaxApprox = 1.0; //cf above
GridBase *FineGrid = _subspace[0].Grid();
int checkerboard = _subspace[0].Checkerboard();
FineField fB(FineGrid);fB.Checkerboard() =checkerboard;
@ -211,13 +201,13 @@ public:
eval = vnum/vden;
fv -= eval*fB;
RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0);
if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
std::cout.precision(13);
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv << " target " << eresid*eresid
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
<<std::endl;
if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
if( (vv<eresid*eresid) ) return 1;
return 0;
}
@ -295,10 +285,6 @@ public:
evals_coarse.resize(0);
};
//The block inner product is the inner product on the fine grid locally summed over the blocks
//to give a Lattice<Scalar> on the coarse grid. This function orthnormalizes the fine-grid subspace
//vectors under the block inner product. This step must be performed after computing the fine grid
//eigenvectors and before computing the coarse grid eigenvectors.
void Orthogonalise(void ) {
CoarseScalar InnerProd(_CoarseGrid);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
@ -342,8 +328,6 @@ public:
}
}
//While this method serves to check the coarse eigenvectors, it also recomputes the eigenvalues from the smoothed reconstructed eigenvectors
//hence the smoother can be tuned after running the coarse Lanczos by using a different smoother here
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
{
assert(evals_fine.size() == nbasis);
@ -392,31 +376,25 @@ public:
evals_fine.resize(nbasis);
subspace.resize(nbasis,_FineGrid);
}
//cheby_op: Parameters of the fine grid Chebyshev polynomial used for the Lanczos acceleration
//cheby_smooth: Parameters of a separate Chebyshev polynomial used after the Lanczos has completed to smooth out high frequency noise in the reconstructed fine grid eigenvectors prior to computing the eigenvalue
//relax: Reconstructed eigenvectors (post smoothing) are naturally not as precise as true eigenvectors. This factor acts as a multiplier on the stopping condition when determining whether the results satisfy the user provided stopping condition
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
int Nstop, int Nk, int Nm,RealD resid,
RealD MaxIt, RealD betastp, int MinRes)
{
Chebyshev<FineField> Cheby(cheby_op); //Chebyshev of fine operator on fine grid
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace); //Fine operator on coarse grid with intermediate fine grid conversion
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace); //Chebyshev of fine operator on coarse grid with intermediate fine grid conversion
Chebyshev<FineField> Cheby(cheby_op);
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
//////////////////////////////////////////////////////////////////////////////////////////////////
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
//////////////////////////////////////////////////////////////////////////////////////////////////
Chebyshev<FineField> ChebySmooth(cheby_smooth); //lower order Chebyshev of fine operator on fine grid used to smooth regenerated eigenvectors
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
Chebyshev<FineField> ChebySmooth(cheby_smooth);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
evals_coarse.resize(Nm);
evec_coarse.resize(Nm,_CoarseGrid);
CoarseField src(_CoarseGrid); src=1.0;
//Note the "tester" here is also responsible for generating the fine grid eigenvalues which are output into the "evals_coarse" array
ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
int Nconv=0;
IRL.calc(evals_coarse,evec_coarse,src,Nconv,false);
@ -427,14 +405,6 @@ public:
std::cout << i << " Coarse eval = " << evals_coarse[i] << std::endl;
}
}
//Get the fine eigenvector 'i' by reconstruction
void getFineEvecEval(FineField &evec, RealD &eval, const int i) const{
blockPromote(evec_coarse[i],evec,subspace);
eval = evals_coarse[i];
}
};
NAMESPACE_END(Grid);

View File

@ -29,8 +29,6 @@ template<class Field> class PowerMethod
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
RealD vden = norm2(src_n);
RealD na = vnum/vden;
std::cout << GridLogIterative << "PowerMethod: Current approximation of largest eigenvalue " << na << std::endl;
if ( (fabs(evalMaxApprox/na - 1.0) < 0.001) || (i==_MAX_ITER_EST_-1) ) {
evalMaxApprox = na;

View File

@ -40,7 +40,7 @@ void MemoryManager::PrintBytes(void)
//////////////////////////////////////////////////////////////////////
MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax];
int MemoryManager::Victim[MemoryManager::NallocType];
int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 2, 8, 2, 8 };
int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 8, 16, 8, 16 };
uint64_t MemoryManager::CacheBytes[MemoryManager::NallocType];
//////////////////////////////////////////////////////////////////////
// Actual allocation and deallocation utils

View File

@ -36,6 +36,11 @@ NAMESPACE_BEGIN(Grid);
#define GRID_ALLOC_SMALL_LIMIT (4096)
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
#define FILE_LINE __FILE__ ":" TOSTRING(__LINE__)
#define AUDIT(a) MemoryManager::Audit(FILE_LINE)
/*Pinning pages is costly*/
////////////////////////////////////////////////////////////////////////////
// Advise the LatticeAccelerator class
@ -92,8 +97,9 @@ private:
static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim,uint64_t &cbytes) ;
static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t &cbytes) ;
static void PrintBytes(void);
public:
static void PrintBytes(void);
static void Audit(std::string s);
static void Init(void);
static void InitMessage(void);
static void *AcceleratorAllocate(size_t bytes);
@ -113,6 +119,8 @@ private:
static uint64_t DeviceToHostBytes;
static uint64_t HostToDeviceXfer;
static uint64_t DeviceToHostXfer;
static uint64_t DeviceEvictions;
static uint64_t DeviceDestroy;
private:
#ifndef GRID_UVM
@ -170,6 +178,7 @@ private:
public:
static void Print(void);
static void PrintAll(void);
static void PrintState( void* CpuPtr);
static int isOpen (void* CpuPtr);
static void ViewClose(void* CpuPtr,ViewMode mode);

View File

@ -3,8 +3,13 @@
#warning "Using explicit device memory copies"
NAMESPACE_BEGIN(Grid);
//#define dprintf(...) printf ( __VA_ARGS__ ); fflush(stdout);
#define dprintf(...)
#define MAXLINE 512
static char print_buffer [ MAXLINE ];
#define mprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer;
#define dprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer;
//#define dprintf(...)
////////////////////////////////////////////////////////////
@ -23,6 +28,8 @@ uint64_t MemoryManager::HostToDeviceBytes;
uint64_t MemoryManager::DeviceToHostBytes;
uint64_t MemoryManager::HostToDeviceXfer;
uint64_t MemoryManager::DeviceToHostXfer;
uint64_t MemoryManager::DeviceEvictions;
uint64_t MemoryManager::DeviceDestroy;
////////////////////////////////////
// Priority ordering for unlocked entries
@ -104,15 +111,17 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
///////////////////////////////////////////////////////////
assert(AccCache.state!=Empty);
dprintf("MemoryManager: Discard(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
mprintf("MemoryManager: Discard(%lx) %lx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
assert(AccCache.accLock==0);
assert(AccCache.cpuLock==0);
assert(AccCache.CpuPtr!=(uint64_t)NULL);
if(AccCache.AccPtr) {
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
DeviceDestroy++;
DeviceBytes -=AccCache.bytes;
LRUremove(AccCache);
dprintf("MemoryManager: Free(%llx) LRU %lld Total %lld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes);
AccCache.AccPtr=(uint64_t) NULL;
dprintf("MemoryManager: Free(%lx) LRU %ld Total %ld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes);
}
uint64_t CpuPtr = AccCache.CpuPtr;
EntryErase(CpuPtr);
@ -121,26 +130,36 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
{
///////////////////////////////////////////////////////////////////////////
// Make CPU consistent, remove from Accelerator, remove entry
// Cannot be locked. If allocated must be in LRU pool.
// Make CPU consistent, remove from Accelerator, remove from LRU, LEAVE CPU only entry
// Cannot be acclocked. If allocated must be in LRU pool.
//
// Nov 2022... Felix issue: Allocating two CpuPtrs, can have an entry in LRU-q with CPUlock.
// and require to evict the AccPtr copy. Eviction was a mistake in CpuViewOpen
// but there is a weakness where CpuLock entries are attempted for erase
// Take these OUT LRU queue when CPU locked?
// Cannot take out the table as cpuLock data is important.
///////////////////////////////////////////////////////////////////////////
assert(AccCache.state!=Empty);
dprintf("MemoryManager: Evict(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
assert(AccCache.accLock==0);
assert(AccCache.cpuLock==0);
mprintf("MemoryManager: Evict cpu %lx acc %lx cpuLock %ld accLock %ld\n",
(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr,
(uint64_t)AccCache.cpuLock,(uint64_t)AccCache.accLock);
assert(AccCache.accLock==0); // Cannot evict so logic bomb
assert(AccCache.CpuPtr!=(uint64_t)NULL);
if(AccCache.state==AccDirty) {
Flush(AccCache);
}
assert(AccCache.CpuPtr!=(uint64_t)NULL);
if(AccCache.AccPtr) {
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
DeviceBytes -=AccCache.bytes;
LRUremove(AccCache);
dprintf("MemoryManager: Free(%llx) footprint now %lld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
AccCache.AccPtr=(uint64_t)NULL;
AccCache.state=CpuDirty; // CPU primary now
DeviceBytes -=AccCache.bytes;
dprintf("MemoryManager: Free(%lx) footprint now %ld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
}
uint64_t CpuPtr = AccCache.CpuPtr;
EntryErase(CpuPtr);
// uint64_t CpuPtr = AccCache.CpuPtr;
DeviceEvictions++;
// EntryErase(CpuPtr);
}
void MemoryManager::Flush(AcceleratorViewEntry &AccCache)
{
@ -150,7 +169,7 @@ void MemoryManager::Flush(AcceleratorViewEntry &AccCache)
assert(AccCache.AccPtr!=(uint64_t)NULL);
assert(AccCache.CpuPtr!=(uint64_t)NULL);
acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes);
dprintf("MemoryManager: Flush %llx -> %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
mprintf("MemoryManager: Flush %lx -> %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
DeviceToHostBytes+=AccCache.bytes;
DeviceToHostXfer++;
AccCache.state=Consistent;
@ -165,7 +184,7 @@ void MemoryManager::Clone(AcceleratorViewEntry &AccCache)
AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes);
DeviceBytes+=AccCache.bytes;
}
dprintf("MemoryManager: Clone %llx <- %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
mprintf("MemoryManager: Clone %lx <- %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes);
HostToDeviceBytes+=AccCache.bytes;
HostToDeviceXfer++;
@ -191,6 +210,7 @@ void MemoryManager::CpuDiscard(AcceleratorViewEntry &AccCache)
void MemoryManager::ViewClose(void* Ptr,ViewMode mode)
{
if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){
dprintf("AcceleratorViewClose %lx\n",(uint64_t)Ptr);
AcceleratorViewClose((uint64_t)Ptr);
} else if( (mode==CpuRead)||(mode==CpuWrite)){
CpuViewClose((uint64_t)Ptr);
@ -202,6 +222,7 @@ void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvis
{
uint64_t CpuPtr = (uint64_t)_CpuPtr;
if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){
dprintf("AcceleratorViewOpen %lx\n",(uint64_t)CpuPtr);
return (void *) AcceleratorViewOpen(CpuPtr,bytes,mode,hint);
} else if( (mode==CpuRead)||(mode==CpuWrite)){
return (void *)CpuViewOpen(CpuPtr,bytes,mode,hint);
@ -212,13 +233,16 @@ void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvis
}
void MemoryManager::EvictVictims(uint64_t bytes)
{
assert(bytes<DeviceMaxBytes);
while(bytes+DeviceLRUBytes > DeviceMaxBytes){
if ( DeviceLRUBytes > 0){
assert(LRU.size()>0);
uint64_t victim = LRU.back();
uint64_t victim = LRU.back(); // From the LRU
auto AccCacheIterator = EntryLookup(victim);
auto & AccCache = AccCacheIterator->second;
Evict(AccCache);
} else {
return;
}
}
}
@ -241,11 +265,12 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
assert(AccCache.cpuLock==0); // Programming error
if(AccCache.state!=Empty) {
dprintf("ViewOpen found entry %llx %llx : %lld %lld\n",
dprintf("ViewOpen found entry %lx %lx : %ld %ld accLock %ld\n",
(uint64_t)AccCache.CpuPtr,
(uint64_t)CpuPtr,
(uint64_t)AccCache.bytes,
(uint64_t)bytes);
(uint64_t)bytes,
(uint64_t)AccCache.accLock);
assert(AccCache.CpuPtr == CpuPtr);
assert(AccCache.bytes ==bytes);
}
@ -280,6 +305,7 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
AccCache.state = Consistent; // Empty + AccRead => Consistent
}
AccCache.accLock= 1;
dprintf("Copied Empty entry into device accLock= %d\n",AccCache.accLock);
} else if(AccCache.state==CpuDirty ){
if(mode==AcceleratorWriteDiscard) {
CpuDiscard(AccCache);
@ -292,28 +318,30 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
AccCache.state = Consistent; // CpuDirty + AccRead => Consistent
}
AccCache.accLock++;
dprintf("Copied CpuDirty entry into device accLock %d\n",AccCache.accLock);
dprintf("CpuDirty entry into device ++accLock= %d\n",AccCache.accLock);
} else if(AccCache.state==Consistent) {
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
AccCache.state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty
else
AccCache.state = Consistent; // Consistent + AccRead => Consistent
AccCache.accLock++;
dprintf("Consistent entry into device accLock %d\n",AccCache.accLock);
dprintf("Consistent entry into device ++accLock= %d\n",AccCache.accLock);
} else if(AccCache.state==AccDirty) {
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
AccCache.state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty
else
AccCache.state = AccDirty; // AccDirty + AccRead => AccDirty
AccCache.accLock++;
dprintf("AccDirty entry into device accLock %d\n",AccCache.accLock);
dprintf("AccDirty entry ++accLock= %d\n",AccCache.accLock);
} else {
assert(0);
}
// If view is opened on device remove from LRU
assert(AccCache.accLock>0);
// If view is opened on device must remove from LRU
if(AccCache.LRU_valid==1){
// must possibly remove from LRU as now locked on GPU
dprintf("AccCache entry removed from LRU \n");
LRUremove(AccCache);
}
@ -334,10 +362,12 @@ void MemoryManager::AcceleratorViewClose(uint64_t CpuPtr)
assert(AccCache.accLock>0);
AccCache.accLock--;
// Move to LRU queue if not locked and close on device
if(AccCache.accLock==0) {
dprintf("AccleratorViewClose %lx AccLock decremented to %ld move to LRU queue\n",(uint64_t)CpuPtr,(uint64_t)AccCache.accLock);
LRUinsert(AccCache);
} else {
dprintf("AccleratorViewClose %lx AccLock decremented to %ld\n",(uint64_t)CpuPtr,(uint64_t)AccCache.accLock);
}
}
void MemoryManager::CpuViewClose(uint64_t CpuPtr)
@ -374,9 +404,10 @@ uint64_t MemoryManager::CpuViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,V
auto AccCacheIterator = EntryLookup(CpuPtr);
auto & AccCache = AccCacheIterator->second;
if (!AccCache.AccPtr) {
EvictVictims(bytes);
}
// CPU doesn't need to free space
// if (!AccCache.AccPtr) {
// EvictVictims(bytes);
// }
assert((mode==CpuRead)||(mode==CpuWrite));
assert(AccCache.accLock==0); // Programming error
@ -430,20 +461,28 @@ void MemoryManager::NotifyDeletion(void *_ptr)
void MemoryManager::Print(void)
{
PrintBytes();
std::cout << GridLogDebug << "--------------------------------------------" << std::endl;
std::cout << GridLogDebug << "Memory Manager " << std::endl;
std::cout << GridLogDebug << "--------------------------------------------" << std::endl;
std::cout << GridLogDebug << DeviceBytes << " bytes allocated on device " << std::endl;
std::cout << GridLogDebug << DeviceLRUBytes<< " bytes evictable on device " << std::endl;
std::cout << GridLogDebug << DeviceMaxBytes<< " bytes max on device " << std::endl;
std::cout << GridLogDebug << HostToDeviceXfer << " transfers to device " << std::endl;
std::cout << GridLogDebug << DeviceToHostXfer << " transfers from device " << std::endl;
std::cout << GridLogDebug << HostToDeviceBytes<< " bytes transfered to device " << std::endl;
std::cout << GridLogDebug << DeviceToHostBytes<< " bytes transfered from device " << std::endl;
std::cout << GridLogDebug << AccViewTable.size()<< " vectors " << LRU.size()<<" evictable"<< std::endl;
std::cout << GridLogDebug << "--------------------------------------------" << std::endl;
std::cout << GridLogDebug << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "<<std::endl;
std::cout << GridLogDebug << "--------------------------------------------" << std::endl;
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
std::cout << GridLogMessage << "Memory Manager " << std::endl;
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
std::cout << GridLogMessage << DeviceBytes << " bytes allocated on device " << std::endl;
std::cout << GridLogMessage << DeviceLRUBytes<< " bytes evictable on device " << std::endl;
std::cout << GridLogMessage << DeviceMaxBytes<< " bytes max on device " << std::endl;
std::cout << GridLogMessage << HostToDeviceXfer << " transfers to device " << std::endl;
std::cout << GridLogMessage << DeviceToHostXfer << " transfers from device " << std::endl;
std::cout << GridLogMessage << HostToDeviceBytes<< " bytes transfered to device " << std::endl;
std::cout << GridLogMessage << DeviceToHostBytes<< " bytes transfered from device " << std::endl;
std::cout << GridLogMessage << DeviceEvictions << " Evictions from device " << std::endl;
std::cout << GridLogMessage << DeviceDestroy << " Destroyed vectors on device " << std::endl;
std::cout << GridLogMessage << AccViewTable.size()<< " vectors " << LRU.size()<<" evictable"<< std::endl;
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
}
void MemoryManager::PrintAll(void)
{
Print();
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
std::cout << GridLogMessage << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "<<std::endl;
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
for(auto it=AccViewTable.begin();it!=AccViewTable.end();it++){
auto &AccCache = it->second;
@ -453,13 +492,13 @@ void MemoryManager::Print(void)
if ( AccCache.state==AccDirty ) str = std::string("AccDirty");
if ( AccCache.state==Consistent)str = std::string("Consistent");
std::cout << GridLogDebug << "0x"<<std::hex<<AccCache.CpuPtr<<std::dec
std::cout << GridLogMessage << "0x"<<std::hex<<AccCache.CpuPtr<<std::dec
<< "\t0x"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str
<< "\t" << AccCache.cpuLock
<< "\t" << AccCache.accLock
<< "\t" << AccCache.LRU_valid<<std::endl;
}
std::cout << GridLogDebug << "--------------------------------------------" << std::endl;
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
};
int MemoryManager::isOpen (void* _CpuPtr)
@ -473,6 +512,61 @@ int MemoryManager::isOpen (void* _CpuPtr)
return 0;
}
}
void MemoryManager::Audit(std::string s)
{
uint64_t CpuBytes=0;
uint64_t AccBytes=0;
uint64_t LruBytes1=0;
uint64_t LruBytes2=0;
uint64_t LruCnt=0;
uint64_t LockedBytes=0;
std::cout << " Memory Manager::Audit() from "<<s<<std::endl;
for(auto it=LRU.begin();it!=LRU.end();it++){
uint64_t cpuPtr = *it;
assert(EntryPresent(cpuPtr));
auto AccCacheIterator = EntryLookup(cpuPtr);
auto & AccCache = AccCacheIterator->second;
LruBytes2+=AccCache.bytes;
assert(AccCache.LRU_valid==1);
assert(AccCache.LRU_entry==it);
}
std::cout << " Memory Manager::Audit() LRU queue matches table entries "<<std::endl;
for(auto it=AccViewTable.begin();it!=AccViewTable.end();it++){
auto &AccCache = it->second;
std::string str;
if ( AccCache.state==Empty ) str = std::string("Empty");
if ( AccCache.state==CpuDirty ) str = std::string("CpuDirty");
if ( AccCache.state==AccDirty ) str = std::string("AccDirty");
if ( AccCache.state==Consistent)str = std::string("Consistent");
CpuBytes+=AccCache.bytes;
if( AccCache.AccPtr ) AccBytes+=AccCache.bytes;
if( AccCache.LRU_valid ) LruBytes1+=AccCache.bytes;
if( AccCache.LRU_valid ) LruCnt++;
if ( AccCache.cpuLock || AccCache.accLock ) {
assert(AccCache.LRU_valid==0);
std::cout << GridLogError << s<< "\n\t 0x"<<std::hex<<AccCache.CpuPtr<<std::dec
<< "\t0x"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str
<< "\t cpuLock " << AccCache.cpuLock
<< "\t accLock " << AccCache.accLock
<< "\t LRUvalid " << AccCache.LRU_valid<<std::endl;
}
assert( AccCache.cpuLock== 0 ) ;
assert( AccCache.accLock== 0 ) ;
}
std::cout << " Memory Manager::Audit() no locked table entries "<<std::endl;
assert(LruBytes1==LruBytes2);
assert(LruBytes1==DeviceLRUBytes);
std::cout << " Memory Manager::Audit() evictable bytes matches sum over table "<<std::endl;
assert(AccBytes==DeviceBytes);
std::cout << " Memory Manager::Audit() device bytes matches sum over table "<<std::endl;
assert(LruCnt == LRU.size());
std::cout << " Memory Manager::Audit() LRU entry count matches "<<std::endl;
}
void MemoryManager::PrintState(void* _CpuPtr)
{
@ -489,8 +583,8 @@ void MemoryManager::PrintState(void* _CpuPtr)
if ( AccCache.state==EvictNext) str = std::string("EvictNext");
std::cout << GridLogMessage << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "<<std::endl;
std::cout << GridLogMessage << "0x"<<std::hex<<AccCache.CpuPtr<<std::dec
<< "\t0x"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str
std::cout << GridLogMessage << "\tx"<<std::hex<<AccCache.CpuPtr<<std::dec
<< "\tx"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str
<< "\t" << AccCache.cpuLock
<< "\t" << AccCache.accLock
<< "\t" << AccCache.LRU_valid<<std::endl;

View File

@ -12,7 +12,10 @@ uint64_t MemoryManager::HostToDeviceBytes;
uint64_t MemoryManager::DeviceToHostBytes;
uint64_t MemoryManager::HostToDeviceXfer;
uint64_t MemoryManager::DeviceToHostXfer;
uint64_t MemoryManager::DeviceEvictions;
uint64_t MemoryManager::DeviceDestroy;
void MemoryManager::Audit(std::string s){};
void MemoryManager::ViewClose(void* AccPtr,ViewMode mode){};
void *MemoryManager::ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint){ return CpuPtr; };
int MemoryManager::isOpen (void* CpuPtr) { return 0;}
@ -21,6 +24,7 @@ void MemoryManager::PrintState(void* CpuPtr)
std::cout << GridLogMessage << "Host<->Device memory movement not currently managed by Grid." << std::endl;
};
void MemoryManager::Print(void){};
void MemoryManager::PrintAll(void){};
void MemoryManager::NotifyDeletion(void *ptr){};
NAMESPACE_END(Grid);

View File

@ -392,9 +392,9 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes);
}
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
this->StencilSendToRecvFromComplete(list,dir);
}
// if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
// this->StencilSendToRecvFromComplete(list,dir);
// }
return off_node_bytes;
}

File diff suppressed because it is too large Load Diff

View File

@ -129,7 +129,7 @@ public:
auto exprCopy = expr;
ExpressionViewOpen(exprCopy);
auto me = View(AcceleratorWriteDiscard);
auto me = View(AcceleratorWrite);
accelerator_for(ss,me.size(),vobj::Nsimd(),{
auto tmp = eval(ss,exprCopy);
coalescedWrite(me[ss],tmp);
@ -152,7 +152,7 @@ public:
auto exprCopy = expr;
ExpressionViewOpen(exprCopy);
auto me = View(AcceleratorWriteDiscard);
auto me = View(AcceleratorWrite);
accelerator_for(ss,me.size(),vobj::Nsimd(),{
auto tmp = eval(ss,exprCopy);
coalescedWrite(me[ss],tmp);
@ -174,7 +174,7 @@ public:
this->checkerboard=cb;
auto exprCopy = expr;
ExpressionViewOpen(exprCopy);
auto me = View(AcceleratorWriteDiscard);
auto me = View(AcceleratorWrite);
accelerator_for(ss,me.size(),vobj::Nsimd(),{
auto tmp = eval(ss,exprCopy);
coalescedWrite(me[ss],tmp);
@ -245,7 +245,7 @@ public:
///////////////////////////////////////////
// user defined constructor
///////////////////////////////////////////
Lattice(GridBase *grid,ViewMode mode=AcceleratorWriteDiscard) {
Lattice(GridBase *grid,ViewMode mode=AcceleratorWrite) {
this->_grid = grid;
resize(this->_grid->oSites());
assert((((uint64_t)&this->_odata[0])&0xF) ==0);
@ -288,7 +288,7 @@ public:
typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0;
conformable(*this,r);
this->checkerboard = r.Checkerboard();
auto me = View(AcceleratorWriteDiscard);
auto me = View(AcceleratorWrite);
auto him= r.View(AcceleratorRead);
accelerator_for(ss,me.size(),vobj::Nsimd(),{
coalescedWrite(me[ss],him(ss));
@ -303,7 +303,7 @@ public:
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
this->checkerboard = r.Checkerboard();
conformable(*this,r);
auto me = View(AcceleratorWriteDiscard);
auto me = View(AcceleratorWrite);
auto him= r.View(AcceleratorRead);
accelerator_for(ss,me.size(),vobj::Nsimd(),{
coalescedWrite(me[ss],him(ss));

View File

@ -29,6 +29,19 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid);
template<class vobj> void DumpSliceNorm(std::string s,Lattice<vobj> &f,int mu=-1)
{
auto ff = localNorm2(f);
if ( mu==-1 ) mu = f.Grid()->Nd()-1;
typedef typename vobj::tensor_reduced normtype;
typedef typename normtype::scalar_object scalar;
std::vector<scalar> sff;
sliceSum(ff,sff,mu);
for(int t=0;t<sff.size();t++){
std::cout << s<<" "<<t<<" "<<sff[t]<<std::endl;
}
}
template<class vobj> uint32_t crc(Lattice<vobj> & buf)
{
autoView( buf_v , buf, CpuRead);

View File

@ -28,6 +28,9 @@ Author: Christoph Lehner <christoph@lhnr.de>
#if defined(GRID_CUDA)||defined(GRID_HIP)
#include <Grid/lattice/Lattice_reduction_gpu.h>
#endif
#if defined(GRID_SYCL)
#include <Grid/lattice/Lattice_reduction_sycl.h>
#endif
NAMESPACE_BEGIN(Grid);
@ -127,7 +130,7 @@ inline Double max(const Double *arg, Integer osites)
template<class vobj>
inline typename vobj::scalar_object sum(const vobj *arg, Integer osites)
{
#if defined(GRID_CUDA)||defined(GRID_HIP)
#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
return sum_gpu(arg,osites);
#else
return sum_cpu(arg,osites);
@ -136,25 +139,50 @@ inline typename vobj::scalar_object sum(const vobj *arg, Integer osites)
template<class vobj>
inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites)
{
#if defined(GRID_CUDA)||defined(GRID_HIP)
#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
return sumD_gpu(arg,osites);
#else
return sumD_cpu(arg,osites);
#endif
}
template<class vobj>
inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites)
{
#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
return sumD_gpu_large(arg,osites);
#else
return sumD_cpu(arg,osites);
#endif
}
template<class vobj>
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
{
#if defined(GRID_CUDA)||defined(GRID_HIP)
Integer osites = arg.Grid()->oSites();
#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
typename vobj::scalar_object ssum;
autoView( arg_v, arg, AcceleratorRead);
ssum= sum_gpu(&arg_v[0],osites);
#else
autoView(arg_v, arg, CpuRead);
auto ssum= sum_cpu(&arg_v[0],osites);
#endif
arg.Grid()->GlobalSum(ssum);
return ssum;
}
template<class vobj>
inline typename vobj::scalar_object sum_large(const Lattice<vobj> &arg)
{
#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
autoView( arg_v, arg, AcceleratorRead);
Integer osites = arg.Grid()->oSites();
auto ssum= sum_gpu(&arg_v[0],osites);
auto ssum= sum_gpu_large(&arg_v[0],osites);
#else
autoView(arg_v, arg, CpuRead);
Integer osites = arg.Grid()->oSites();
auto ssum= sum_cpu(&arg_v[0],osites);
#endif
#endif
arg.Grid()->GlobalSum(ssum);
return ssum;
}
@ -210,11 +238,10 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
typedef decltype(innerProductD(vobj(),vobj())) inner_t;
Vector<inner_t> inner_tmp(sites);
auto inner_tmp_v = &inner_tmp[0];
{
autoView( left_v , left, AcceleratorRead);
autoView( right_v,right, AcceleratorRead);
// This code could read coalesce
// GPU - SIMT lane compliance...
accelerator_for( ss, sites, 1,{
auto x_l = left_v[ss];

View File

@ -23,7 +23,7 @@ unsigned int nextPow2(Iterator x) {
}
template <class Iterator>
void getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator &threads, Iterator &blocks) {
int getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator &threads, Iterator &blocks) {
int device;
#ifdef GRID_CUDA
@ -37,13 +37,13 @@ void getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator
Iterator sharedMemPerBlock = gpu_props[device].sharedMemPerBlock;
Iterator maxThreadsPerBlock = gpu_props[device].maxThreadsPerBlock;
Iterator multiProcessorCount = gpu_props[device].multiProcessorCount;
/*
std::cout << GridLogDebug << "GPU has:" << std::endl;
std::cout << GridLogDebug << "\twarpSize = " << warpSize << std::endl;
std::cout << GridLogDebug << "\tsharedMemPerBlock = " << sharedMemPerBlock << std::endl;
std::cout << GridLogDebug << "\tmaxThreadsPerBlock = " << maxThreadsPerBlock << std::endl;
std::cout << GridLogDebug << "\tmultiProcessorCount = " << multiProcessorCount << std::endl;
*/
if (warpSize != WARP_SIZE) {
std::cout << GridLogError << "The warp size of the GPU in use does not match the warp size set when compiling Grid." << std::endl;
exit(EXIT_FAILURE);
@ -53,12 +53,12 @@ void getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator
threads = warpSize;
if ( threads*sizeofsobj > sharedMemPerBlock ) {
std::cout << GridLogError << "The object is too large for the shared memory." << std::endl;
exit(EXIT_FAILURE);
return 0;
}
while( 2*threads*sizeofsobj < sharedMemPerBlock && 2*threads <= maxThreadsPerBlock ) threads *= 2;
// keep all the streaming multiprocessors busy
blocks = nextPow2(multiProcessorCount);
return 1;
}
template <class sobj, class Iterator>
@ -198,7 +198,7 @@ __global__ void reduceKernel(const vobj *lat, sobj *buffer, Iterator n) {
// Possibly promote to double and sum
/////////////////////////////////////////////////////////////////////////////////////////////////////////
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)
inline typename vobj::scalar_objectD sumD_gpu_small(const vobj *lat, Integer osites)
{
typedef typename vobj::scalar_objectD sobj;
typedef decltype(lat) Iterator;
@ -207,7 +207,9 @@ inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)
Integer size = osites*nsimd;
Integer numThreads, numBlocks;
getNumBlocksAndThreads(size, sizeof(sobj), numThreads, numBlocks);
int ok = getNumBlocksAndThreads(size, sizeof(sobj), numThreads, numBlocks);
assert(ok);
Integer smemSize = numThreads * sizeof(sobj);
Vector<sobj> buffer(numBlocks);
@ -218,6 +220,54 @@ inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)
auto result = buffer_v[0];
return result;
}
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu_large(const vobj *lat, Integer osites)
{
typedef typename vobj::vector_type vector;
typedef typename vobj::scalar_typeD scalarD;
typedef typename vobj::scalar_objectD sobj;
sobj ret;
scalarD *ret_p = (scalarD *)&ret;
const int words = sizeof(vobj)/sizeof(vector);
Vector<vector> buffer(osites);
vector *dat = (vector *)lat;
vector *buf = &buffer[0];
iScalar<vector> *tbuf =(iScalar<vector> *) &buffer[0];
for(int w=0;w<words;w++) {
accelerator_for(ss,osites,1,{
buf[ss] = dat[ss*words+w];
});
ret_p[w] = sumD_gpu_small(tbuf,osites);
}
return ret;
}
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)
{
typedef typename vobj::vector_type vector;
typedef typename vobj::scalar_typeD scalarD;
typedef typename vobj::scalar_objectD sobj;
sobj ret;
Integer nsimd= vobj::Nsimd();
Integer size = osites*nsimd;
Integer numThreads, numBlocks;
int ok = getNumBlocksAndThreads(size, sizeof(sobj), numThreads, numBlocks);
if ( ok ) {
ret = sumD_gpu_small(lat,osites);
} else {
ret = sumD_gpu_large(lat,osites);
}
return ret;
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////
// Return as same precision as input performing reduction in double precision though
/////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -230,6 +280,13 @@ inline typename vobj::scalar_object sum_gpu(const vobj *lat, Integer osites)
return result;
}
template <class vobj>
inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osites)
{
typedef typename vobj::scalar_object sobj;
sobj result;
result = sumD_gpu_large(lat,osites);
return result;
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,125 @@
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////////////////////////////////////////////
// Possibly promote to double and sum
/////////////////////////////////////////////////////////////////////////////////////////////////////////
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer osites)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_objectD sobjD;
sobj *mysum =(sobj *) malloc_shared(sizeof(sobj),*theGridAccelerator);
sobj identity; zeroit(identity);
sobj ret ;
Integer nsimd= vobj::Nsimd();
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
auto Reduction = cl::sycl::reduction(mysum,identity,std::plus<>());
cgh.parallel_for(cl::sycl::range<1>{osites},
Reduction,
[=] (cl::sycl::id<1> item, auto &sum) {
auto osite = item[0];
sum +=Reduce(lat[osite]);
});
});
theGridAccelerator->wait();
ret = mysum[0];
free(mysum,*theGridAccelerator);
sobjD dret; convertType(dret,ret);
return dret;
}
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu_large(const vobj *lat, Integer osites)
{
return sumD_gpu_tensor(lat,osites);
}
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu_small(const vobj *lat, Integer osites)
{
return sumD_gpu_large(lat,osites);
}
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)
{
return sumD_gpu_large(lat,osites);
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////
// Return as same precision as input performing reduction in double precision though
/////////////////////////////////////////////////////////////////////////////////////////////////////////
template <class vobj>
inline typename vobj::scalar_object sum_gpu(const vobj *lat, Integer osites)
{
typedef typename vobj::scalar_object sobj;
sobj result;
result = sumD_gpu(lat,osites);
return result;
}
template <class vobj>
inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osites)
{
typedef typename vobj::scalar_object sobj;
sobj result;
result = sumD_gpu_large(lat,osites);
return result;
}
NAMESPACE_END(Grid);
/*
template<class Double> Double svm_reduce(Double *vec,uint64_t L)
{
Double sumResult; zeroit(sumResult);
Double *d_sum =(Double *)cl::sycl::malloc_shared(sizeof(Double),*theGridAccelerator);
Double identity; zeroit(identity);
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
auto Reduction = cl::sycl::reduction(d_sum,identity,std::plus<>());
cgh.parallel_for(cl::sycl::range<1>{L},
Reduction,
[=] (cl::sycl::id<1> index, auto &sum) {
sum +=vec[index];
});
});
theGridAccelerator->wait();
Double ret = d_sum[0];
free(d_sum,*theGridAccelerator);
std::cout << " svm_reduce finished "<<L<<" sites sum = " << ret <<std::endl;
return ret;
}
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu_repack(const vobj *lat, Integer osites)
{
typedef typename vobj::vector_type vector;
typedef typename vobj::scalar_type scalar;
typedef typename vobj::scalar_typeD scalarD;
typedef typename vobj::scalar_objectD sobjD;
sobjD ret;
scalarD *ret_p = (scalarD *)&ret;
const int nsimd = vobj::Nsimd();
const int words = sizeof(vobj)/sizeof(vector);
Vector<scalar> buffer(osites*nsimd);
scalar *buf = &buffer[0];
vector *dat = (vector *)lat;
for(int w=0;w<words;w++) {
accelerator_for(ss,osites,nsimd,{
int lane = acceleratorSIMTlane(nsimd);
buf[ss*nsimd+lane] = dat[ss*words+w].getlane(lane);
});
//Precision change at this point is to late to gain precision
ret_p[w] = svm_reduce(buf,nsimd*osites);
}
return ret;
}
*/

View File

@ -32,9 +32,8 @@
#include <random>
#ifdef RNG_SITMO
#include <Grid/random/sitmo_prng_engine.hpp>
#include <Grid/sitmo_rng/sitmo_prng_engine.hpp>
#endif
#include <Grid/random/gaussian.h>
#if defined(RNG_SITMO)
#define RNG_FAST_DISCARD
@ -143,8 +142,8 @@ public:
std::vector<RngEngine> _generators;
std::vector<std::uniform_real_distribution<RealD> > _uniform;
std::vector<Grid::gaussian_distribution<RealD> > _gaussian;
// std::vector<std::discrete_distribution<int32_t> > _bernoulli;
std::vector<std::normal_distribution<RealD> > _gaussian;
std::vector<std::discrete_distribution<int32_t> > _bernoulli;
std::vector<std::uniform_int_distribution<uint32_t> > _uid;
///////////////////////
@ -244,8 +243,8 @@ public:
GridSerialRNG() : GridRNGbase() {
_generators.resize(1);
_uniform.resize(1,std::uniform_real_distribution<RealD>{0,1});
_gaussian.resize(1,gaussian_distribution<RealD>(0.0,1.0) );
// _bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1});
_gaussian.resize(1,std::normal_distribution<RealD>(0.0,1.0) );
_bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1});
_uid.resize(1,std::uniform_int_distribution<uint32_t>() );
}
@ -358,8 +357,8 @@ public:
_generators.resize(_vol);
_uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1});
_gaussian.resize(_vol,gaussian_distribution<RealD>(0.0,1.0) );
// _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1});
_gaussian.resize(_vol,std::normal_distribution<RealD>(0.0,1.0) );
_bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1});
_uid.resize(_vol,std::uniform_int_distribution<uint32_t>() );
}
@ -516,11 +515,11 @@ public:
template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._uniform); }
template <class vobj> inline void gaussian(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._gaussian); }
//template <class vobj> inline void bernoulli(GridParallelRNG &rng,Lattice<vobj> &l){ rng.fill(l,rng._bernoulli);}
template <class vobj> inline void bernoulli(GridParallelRNG &rng,Lattice<vobj> &l){ rng.fill(l,rng._bernoulli);}
template <class sobj> inline void random(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._uniform ); }
template <class sobj> inline void gaussian(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._gaussian ); }
//template <class sobj> inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); }
template <class sobj> inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); }
NAMESPACE_END(Grid);
#endif

View File

@ -0,0 +1,126 @@
NAMESPACE_BEGIN(Grid);
// If NOT CUDA or HIP -- we should provide
// -- atomicAdd(float *,float)
// -- atomicAdd(double *,double)
//
// Augment CUDA with complex atomics
#if !defined(GRID_HIP) || !defined(GRID_CUDA)
inline void atomicAdd(float *acc,float elem)
{
*acc += elem;
}
inline void atomicAdd(double *acc,double elem)
{
*acc += elem;
}
#endif
inline void atomicAdd(ComplexD *accum,ComplexD & elem)
{
double *a_p = (double *)accum;
double *e_p = (double *)&elem;
for(int w=0;w<2;w++){
atomicAdd(&a_p[w],e_p[w]);
}
}
inline void atomicAdd(ComplexF *accum,ComplexF & elem)
{
float *a_p = (float *)accum;
float *e_p = (float *)&elem;
for(int w=0;w<2;w++){
atomicAdd(&a_p[w],e_p[w]);
}
}
// Augment CUDA with vobj atomics
template<class vobj> accelerator_inline void atomicAdd(vobj *accum, vobj & elem)
{
typedef typename vobj::scalar_type scalar_type;
scalar_type *a_p= (scalar_type *)accum;
scalar_type *e_p= (scalar_type *)& elem;
for(int w=0;w<vobj::Nsimd();w++){
atomicAdd(&a_p[w],e_p[w]);
}
}
// Atomics based slice sum
template<class vobj> inline void sliceSumGpu(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_object::scalar_type scalar_type;
GridBase *grid = Data.Grid();
assert(grid!=NULL);
const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd();
assert(orthogdim >= 0);
assert(orthogdim < Nd);
int fd=grid->_fdimensions[orthogdim];
int ld=grid->_ldimensions[orthogdim];
int rd=grid->_rdimensions[orthogdim];
// Move to device memory and copy in / out
Vector<vobj> lvSum(rd); // will locally sum vectors first
Vector<sobj> lsSum(ld,Zero()); // sum across these down to scalars
ExtractBuffer<sobj> extracted(Nsimd); // splitting the SIMD
result.resize(fd); // And then global sum to return the same vector to every node
for(int r=0;r<rd;r++){
lvSum[r]=Zero();
}
int e1= grid->_slice_nblock[orthogdim];
int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim];
// sum over reduced dimension planes, breaking out orthog dir
// Parallel over orthog direction
autoView( Data_v, Data, AcceleratorRead);
auto lvSum_p=&lvSum[0];
int ostride = grid->_ostride[orthogdim];
accelerator_for( ree,rd*e1*e2,1, {
int b = ree%e2;
int re= ree/e2;
int n=re%e1;
int r=re/e1;
int so=r*ostride;
int ss=so+n*stride+b;
atomicAdd(&lvSum_p[r],Data_v[ss]);
});
// Sum across simd lanes in the plane, breaking out orthog dir.
Coordinate icoor(Nd);
for(int rt=0;rt<rd;rt++){
extract(lvSum[rt],extracted);
for(int idx=0;idx<Nsimd;idx++){
grid->iCoorFromIindex(icoor,idx);
int ldx =rt+icoor[orthogdim]*rd;
lsSum[ldx]=lsSum[ldx]+extracted[idx];
}
}
// sum over nodes.
for(int t=0;t<fd;t++){
int pt = t/ld; // processor plane
int lt = t%ld;
if ( pt == grid->_processor_coor[orthogdim] ) {
result[t]=lsSum[lt];
} else {
result[t]=Zero();
}
}
scalar_type * ptr = (scalar_type *) &result[0];
int words = fd*sizeof(sobj)/sizeof(scalar_type);
grid->GlobalSumVector(ptr, words);
}
NAMESPACE_END(Grid);

View File

@ -855,7 +855,7 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int
template<class vobj>
void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine)
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
{
typedef typename vobj::scalar_object sobj;
@ -1080,95 +1080,53 @@ vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
});
}
//The workspace for a precision change operation allowing for the reuse of the mapping to save time on subsequent calls
class precisionChangeWorkspace{
std::pair<Integer,Integer>* fmap_device; //device pointer
public:
precisionChangeWorkspace(GridBase *out_grid, GridBase *in_grid){
//Build a map between the sites and lanes of the output field and the input field as we cannot use the Grids on the device
assert(out_grid->Nd() == in_grid->Nd());
for(int d=0;d<out_grid->Nd();d++){
assert(out_grid->FullDimensions()[d] == in_grid->FullDimensions()[d]);
}
int Nsimd_out = out_grid->Nsimd();
std::vector<Coordinate> out_icorrs(out_grid->Nsimd()); //reuse these
for(int lane=0; lane < out_grid->Nsimd(); lane++)
out_grid->iCoorFromIindex(out_icorrs[lane], lane);
std::vector<std::pair<Integer,Integer> > fmap_host(out_grid->lSites()); //lsites = osites*Nsimd
thread_for(out_oidx,out_grid->oSites(),{
Coordinate out_ocorr;
out_grid->oCoorFromOindex(out_ocorr, out_oidx);
Coordinate lcorr; //the local coordinate (common to both in and out as full coordinate)
for(int out_lane=0; out_lane < Nsimd_out; out_lane++){
out_grid->InOutCoorToLocalCoor(out_ocorr, out_icorrs[out_lane], lcorr);
//int in_oidx = in_grid->oIndex(lcorr), in_lane = in_grid->iIndex(lcorr);
//Note oIndex and OcorrFromOindex (and same for iIndex) are not inverse for checkerboarded lattice, the former coordinates being defined on the full lattice and the latter on the reduced lattice
//Until this is fixed we need to circumvent the problem locally. Here I will use the coordinates defined on the reduced lattice for simplicity
int in_oidx = 0, in_lane = 0;
for(int d=0;d<in_grid->_ndimension;d++){
in_oidx += in_grid->_ostride[d] * ( lcorr[d] % in_grid->_rdimensions[d] );
in_lane += in_grid->_istride[d] * ( lcorr[d] / in_grid->_rdimensions[d] );
}
fmap_host[out_lane + Nsimd_out*out_oidx] = std::pair<Integer,Integer>( in_oidx, in_lane );
}
});
//Copy the map to the device (if we had a way to tell if an accelerator is in use we could avoid this copy for CPU-only machines)
size_t fmap_bytes = out_grid->lSites() * sizeof(std::pair<Integer,Integer>);
fmap_device = (std::pair<Integer,Integer>*)acceleratorAllocDevice(fmap_bytes);
acceleratorCopyToDevice(fmap_host.data(), fmap_device, fmap_bytes);
}
//Prevent moving or copying
precisionChangeWorkspace(const precisionChangeWorkspace &r) = delete;
precisionChangeWorkspace(precisionChangeWorkspace &&r) = delete;
precisionChangeWorkspace &operator=(const precisionChangeWorkspace &r) = delete;
precisionChangeWorkspace &operator=(precisionChangeWorkspace &&r) = delete;
std::pair<Integer,Integer> const* getMap() const{ return fmap_device; }
~precisionChangeWorkspace(){
acceleratorFreeDevice(fmap_device);
}
};
//Convert a lattice of one precision to another. The input workspace contains the mapping data.
template<class VobjOut, class VobjIn>
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in, const precisionChangeWorkspace &workspace){
static_assert( std::is_same<typename VobjOut::DoublePrecision, typename VobjIn::DoublePrecision>::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same
out.Checkerboard() = in.Checkerboard();
constexpr int Nsimd_out = VobjOut::Nsimd();
std::pair<Integer,Integer> const* fmap_device = workspace.getMap();
//Do the copy/precision change
autoView( out_v , out, AcceleratorWrite);
autoView( in_v , in, AcceleratorRead);
accelerator_for(out_oidx, out.Grid()->oSites(), 1,{
std::pair<Integer,Integer> const* fmap_osite = fmap_device + out_oidx*Nsimd_out;
for(int out_lane=0; out_lane < Nsimd_out; out_lane++){
int in_oidx = fmap_osite[out_lane].first;
int in_lane = fmap_osite[out_lane].second;
copyLane(out_v[out_oidx], out_lane, in_v[in_oidx], in_lane);
}
});
}
//Convert a Lattice from one precision to another
//Generate the workspace in place; if multiple calls with the same mapping are performed, consider pregenerating the workspace and reusing
template<class VobjOut, class VobjIn>
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
precisionChangeWorkspace workspace(out.Grid(), in.Grid());
precisionChange(out, in, workspace);
}
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in)
{
assert(out.Grid()->Nd() == in.Grid()->Nd());
for(int d=0;d<out.Grid()->Nd();d++){
assert(out.Grid()->FullDimensions()[d] == in.Grid()->FullDimensions()[d]);
}
out.Checkerboard() = in.Checkerboard();
GridBase *in_grid=in.Grid();
GridBase *out_grid = out.Grid();
typedef typename VobjOut::scalar_object SobjOut;
typedef typename VobjIn::scalar_object SobjIn;
int ndim = out.Grid()->Nd();
int out_nsimd = out_grid->Nsimd();
std::vector<Coordinate > out_icoor(out_nsimd);
for(int lane=0; lane < out_nsimd; lane++){
out_icoor[lane].resize(ndim);
out_grid->iCoorFromIindex(out_icoor[lane], lane);
}
std::vector<SobjOut> in_slex_conv(in_grid->lSites());
unvectorizeToLexOrdArray(in_slex_conv, in);
autoView( out_v , out, CpuWrite);
thread_for(out_oidx,out_grid->oSites(),{
Coordinate out_ocoor(ndim);
out_grid->oCoorFromOindex(out_ocoor, out_oidx);
ExtractPointerArray<SobjOut> ptrs(out_nsimd);
Coordinate lcoor(out_grid->Nd());
for(int lane=0; lane < out_nsimd; lane++){
for(int mu=0;mu<ndim;mu++)
lcoor[mu] = out_ocoor[mu] + out_grid->_rdimensions[mu]*out_icoor[lane][mu];
int llex; Lexicographic::IndexFromCoor(lcoor, llex, out_grid->_ldimensions);
ptrs[lane] = &in_slex_conv[llex];
}
merge(out_v[out_oidx], ptrs, 0);
});
}
////////////////////////////////////////////////////////////////////////////////
// Communicate between grids

View File

@ -65,32 +65,40 @@ GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL");
GridLogger GridLogError (1, "Error" , GridLogColours, "RED");
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW");
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL");
GridLogger GridLogMemory (1, "Memory", GridLogColours, "NORMAL");
GridLogger GridLogTracing(1, "Tracing", GridLogColours, "NORMAL");
GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE");
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
GridLogger GridLogDslash (1, "Dslash", GridLogColours, "BLUE");
GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE");
GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE");
GridLogger GridLogHMC (1, "HMC", GridLogColours, "BLUE");
void GridLogConfigure(std::vector<std::string> &logstreams) {
GridLogError.Active(0);
GridLogError.Active(1);
GridLogWarning.Active(0);
GridLogMessage.Active(1); // at least the messages should be always on
GridLogMemory.Active(0);
GridLogTracing.Active(0);
GridLogIterative.Active(0);
GridLogDebug.Active(0);
GridLogPerformance.Active(0);
GridLogDslash.Active(0);
GridLogIntegrator.Active(1);
GridLogColours.Active(0);
GridLogHMC.Active(1);
for (int i = 0; i < logstreams.size(); i++) {
if (logstreams[i] == std::string("Error")) GridLogError.Active(1);
if (logstreams[i] == std::string("Tracing")) GridLogTracing.Active(1);
if (logstreams[i] == std::string("Memory")) GridLogMemory.Active(1);
if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1);
if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0);
if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1);
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1);
if (logstreams[i] == std::string("NoIntegrator")) GridLogIntegrator.Active(0);
if (logstreams[i] == std::string("NoHMC")) GridLogHMC.Active(0);
if (logstreams[i] == std::string("Dslash")) GridLogDslash.Active(1);
if (logstreams[i] == std::string("NoIntegrator"))GridLogIntegrator.Active(0);
if (logstreams[i] == std::string("NoHMC")) GridLogHMC.Active(0);
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
}
}

View File

@ -138,7 +138,8 @@ public:
stream << std::setw(log.topWidth);
}
stream << log.topName << log.background()<< " : ";
stream << log.colour() << std::left;
// stream << log.colour() << std::left;
stream << std::left;
if (log.chanWidth > 0)
{
stream << std::setw(log.chanWidth);
@ -153,9 +154,9 @@ public:
stream << log.evidence()
<< now << log.background() << " : " ;
}
stream << log.colour();
// stream << log.colour();
stream << std::right;
stream.flags(f);
return stream;
} else {
return devnull;
@ -180,9 +181,12 @@ extern GridLogger GridLogWarning;
extern GridLogger GridLogMessage;
extern GridLogger GridLogDebug ;
extern GridLogger GridLogPerformance;
extern GridLogger GridLogDslash;
extern GridLogger GridLogIterative ;
extern GridLogger GridLogIntegrator ;
extern GridLogger GridLogHMC;
extern GridLogger GridLogMemory;
extern GridLogger GridLogTracing;
extern Colours GridLogColours;
std::string demangle(const char* name) ;

View File

@ -31,6 +31,7 @@ directory
#include <fstream>
#include <iomanip>
#include <iostream>
#include <string>
#include <map>
#include <pwd.h>
@ -654,7 +655,8 @@ class IldgWriter : public ScidacWriter {
// Fill ILDG header data struct
//////////////////////////////////////////////////////
ildgFormat ildgfmt ;
ildgfmt.field = std::string("su3gauge");
const std::string stNC = std::to_string( Nc ) ;
ildgfmt.field = std::string("su"+stNC+"gauge");
if ( format == std::string("IEEE32BIG") ) {
ildgfmt.precision = 32;
@ -871,7 +873,8 @@ class IldgReader : public GridLimeReader {
} else {
assert(found_ildgFormat);
assert ( ildgFormat_.field == std::string("su3gauge") );
const std::string stNC = std::to_string( Nc ) ;
assert ( ildgFormat_.field == std::string("su"+stNC+"gauge") );
///////////////////////////////////////////////////////////////////////////////////////
// Populate our Grid metadata as best we can
@ -879,7 +882,7 @@ class IldgReader : public GridLimeReader {
std::ostringstream vers; vers << ildgFormat_.version;
FieldMetaData_.hdr_version = vers.str();
FieldMetaData_.data_type = std::string("4D_SU3_GAUGE_3X3");
FieldMetaData_.data_type = std::string("4D_SU"+stNC+"_GAUGE_"+stNC+"x"+stNC);
FieldMetaData_.nd=4;
FieldMetaData_.dimension.resize(4);

View File

@ -6,8 +6,8 @@
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Jamie Hudspith <renwick.james.hudspth@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -182,8 +182,8 @@ class GaugeStatistics
public:
void operator()(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header)
{
header.link_trace=WilsonLoops<Impl>::linkTrace(data);
header.plaquette =WilsonLoops<Impl>::avgPlaquette(data);
header.link_trace = WilsonLoops<Impl>::linkTrace(data);
header.plaquette = WilsonLoops<Impl>::avgPlaquette(data);
}
};
typedef GaugeStatistics<PeriodicGimplD> PeriodicGaugeStatistics;
@ -203,20 +203,24 @@ template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzCo
//////////////////////////////////////////////////////////////////////
inline void reconstruct3(LorentzColourMatrix & cm)
{
const int x=0;
const int y=1;
const int z=2;
assert( Nc < 4 && Nc > 1 ) ;
for(int mu=0;mu<Nd;mu++){
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
#if Nc == 2
cm(mu)()(1,0) = -adj(cm(mu)()(0,y)) ;
cm(mu)()(1,1) = adj(cm(mu)()(0,x)) ;
#else
const int x=0 , y=1 , z=2 ; // a little disinenuous labelling
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
#endif
}
}
////////////////////////////////////////////////////////////////////////////////
// Some data types for intermediate storage
////////////////////////////////////////////////////////////////////////////////
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, Nd >;
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, Nc-1>, Nd >;
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
@ -278,7 +282,6 @@ struct GaugeSimpleMunger{
template <class fobj, class sobj>
struct GaugeSimpleUnmunger {
void operator()(sobj &in, fobj &out) {
for (int mu = 0; mu < Nd; mu++) {
for (int i = 0; i < Nc; i++) {
@ -317,8 +320,8 @@ template<class fobj,class sobj>
struct Gauge3x2munger{
void operator() (fobj &in,sobj &out){
for(int mu=0;mu<Nd;mu++){
for(int i=0;i<2;i++){
for(int j=0;j<3;j++){
for(int i=0;i<Nc-1;i++){
for(int j=0;j<Nc;j++){
out(mu)()(i,j) = in(mu)(i)(j);
}}
}
@ -330,8 +333,8 @@ template<class fobj,class sobj>
struct Gauge3x2unmunger{
void operator() (sobj &in,fobj &out){
for(int mu=0;mu<Nd;mu++){
for(int i=0;i<2;i++){
for(int j=0;j<3;j++){
for(int i=0;i<Nc-1;i++){
for(int j=0;j<Nc;j++){
out(mu)(i)(j) = in(mu)()(i,j);
}}
}

View File

@ -9,6 +9,7 @@
Author: Matt Spraggs <matthew.spraggs@gmail.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Jamie Hudspith <renwick.james.hudspth@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -30,6 +31,8 @@
#ifndef GRID_NERSC_IO_H
#define GRID_NERSC_IO_H
#include <string>
NAMESPACE_BEGIN(Grid);
using namespace Grid;
@ -39,10 +42,8 @@ using namespace Grid;
////////////////////////////////////////////////////////////////////////////////
class NerscIO : public BinaryIO {
public:
typedef Lattice<vLorentzColourMatrixD> GaugeField;
// Enable/disable exiting if the plaquette in the header does not match the value computed (default true)
static bool & exitOnReadPlaquetteMismatch(){ static bool v=true; return v; }
typedef Lattice<vLorentzColourMatrixD> GaugeField;
static inline void truncate(std::string file){
std::ofstream fout(file,std::ios::out);
@ -147,15 +148,17 @@ public:
std::string format(header.floating_point);
int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE"));
const int ieee32big = (format == std::string("IEEE32BIG"));
const int ieee32 = (format == std::string("IEEE32"));
const int ieee64big = (format == std::string("IEEE64BIG"));
const int ieee64 = (format == std::string("IEEE64") || \
format == std::string("IEEE64LITTLE"));
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
// depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type>
if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
const std::string stNC = std::to_string( Nc ) ;
if ( header.data_type == std::string("4D_SU"+stNC+"_GAUGE") ) {
if ( ieee32 || ieee32big ) {
BinaryIO::readLatticeObject<vLorentzColourMatrixD, LorentzColour2x3F>
(Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format,
@ -166,7 +169,7 @@ public:
(Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format,
nersc_csum,scidac_csuma,scidac_csumb);
}
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) {
} else if ( header.data_type == std::string("4D_SU"+stNC+"_GAUGE_"+stNC+"x"+stNC) ) {
if ( ieee32 || ieee32big ) {
BinaryIO::readLatticeObject<vLorentzColourMatrixD,LorentzColourMatrixF>
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format,
@ -200,7 +203,7 @@ public:
std::cerr << " nersc_csum " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl;
exit(0);
}
if(exitOnReadPlaquetteMismatch()) assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
assert(nersc_csum == header.checksum );
@ -211,27 +214,29 @@ public:
template<class GaugeStats=PeriodicGaugeStatistics>
static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,
std::string file,
std::string ens_label = std::string("DWF"))
std::string ens_label = std::string("DWF"),
std::string ens_id = std::string("UKQCD"),
unsigned int sequence_number = 1)
{
writeConfiguration(Umu,file,0,1,ens_label);
writeConfiguration(Umu,file,0,1,ens_label,ens_id,sequence_number);
}
template<class GaugeStats=PeriodicGaugeStatistics>
static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,
std::string file,
int two_row,
int bits32,
std::string ens_label = std::string("DWF"))
std::string ens_label = std::string("DWF"),
std::string ens_id = std::string("UKQCD"),
unsigned int sequence_number = 1)
{
typedef vLorentzColourMatrixD vobj;
typedef typename vobj::scalar_object sobj;
FieldMetaData header;
///////////////////////////////////////////
// Following should become arguments
///////////////////////////////////////////
header.sequence_number = 1;
header.ensemble_id = std::string("UKQCD");
header.sequence_number = sequence_number;
header.ensemble_id = ens_id;
header.ensemble_label = ens_label;
header.hdr_version = "1.0" ;
typedef LorentzColourMatrixD fobj3D;
typedef LorentzColour2x3D fobj2D;
@ -245,10 +250,14 @@ public:
uint64_t offset;
// Sod it -- always write 3x3 double
header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE_3x3");
GaugeSimpleUnmunger<fobj3D,sobj> munge;
// Sod it -- always write NcxNc double
header.floating_point = std::string("IEEE64BIG");
const std::string stNC = std::to_string( Nc ) ;
if( two_row ) {
header.data_type = std::string("4D_SU" + stNC + "_GAUGE" );
} else {
header.data_type = std::string("4D_SU" + stNC + "_GAUGE_" + stNC + "x" + stNC );
}
if ( grid->IsBoss() ) {
truncate(file);
offset = writeHeader(header,file);
@ -256,8 +265,15 @@ public:
grid->Broadcast(0,(void *)&offset,sizeof(offset));
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
nersc_csum,scidac_csuma,scidac_csumb);
if( two_row ) {
Gauge3x2unmunger<fobj2D,sobj> munge;
BinaryIO::writeLatticeObject<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point,
nersc_csum,scidac_csuma,scidac_csumb);
} else {
GaugeSimpleUnmunger<fobj3D,sobj> munge;
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
nersc_csum,scidac_csuma,scidac_csumb);
}
header.checksum = nersc_csum;
if ( grid->IsBoss() ) {
writeHeader(header,file);
@ -289,8 +305,7 @@ public:
header.plaquette=0.0;
MachineCharacteristics(header);
uint64_t offset;
uint64_t offset;
#ifdef RNG_RANLUX
header.floating_point = std::string("UINT64");
header.data_type = std::string("RANLUX48");
@ -330,7 +345,7 @@ public:
GridBase *grid = parallel.Grid();
uint64_t offset = readHeader(file,grid,header);
uint64_t offset = readHeader(file,grid,header);
FieldMetaData clone(header);

View File

@ -27,10 +27,13 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */
#include <Grid/GridCore.h>
#include <Grid/perfmon/PerfCount.h>
#include <Grid/perfmon/Timer.h>
#include <Grid/perfmon/PerfCount.h>
NAMESPACE_BEGIN(Grid);
GridTimePoint theProgramStart = GridClock::now();
#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
#define RawConfig(A,B) (A<<8|B)
const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {

View File

@ -30,6 +30,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_PERFCOUNT_H
#define GRID_PERFCOUNT_H
#ifndef __SSC_START
#define __SSC_START
#define __SSC_STOP
#endif
#include <sys/time.h>
#include <ctime>
#include <chrono>
@ -72,17 +78,9 @@ static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
inline uint64_t cyclecount(void){
return 0;
}
#define __SSC_MARK(mark) __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(mark):"%ebx")
#define __SSC_STOP __SSC_MARK(0x110)
#define __SSC_START __SSC_MARK(0x111)
#else
#define __SSC_MARK(mark)
#define __SSC_STOP
#define __SSC_START
/*
* cycle counters arch dependent
*/

View File

@ -35,17 +35,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid)
// Dress the output; use std::chrono
// C++11 time facilities better?
inline double usecond(void) {
struct timeval tv;
#ifdef TIMERS_ON
gettimeofday(&tv,NULL);
#endif
return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
}
typedef std::chrono::system_clock GridClock;
//typedef std::chrono::system_clock GridClock;
typedef std::chrono::high_resolution_clock GridClock;
typedef std::chrono::time_point<GridClock> GridTimePoint;
typedef std::chrono::seconds GridSecs;
@ -53,6 +44,15 @@ typedef std::chrono::milliseconds GridMillisecs;
typedef std::chrono::microseconds GridUsecs;
typedef std::chrono::microseconds GridTime;
extern GridTimePoint theProgramStart;
// Dress the output; use std::chrono
// C++11 time facilities better?
inline double usecond(void) {
auto usecs = std::chrono::duration_cast<GridUsecs>(GridClock::now()-theProgramStart);
return 1.0*usecs.count();
}
inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time)
{
stream << time.count()<<" s";

70
Grid/perfmon/Tracing.h Normal file
View File

@ -0,0 +1,70 @@
#pragma once
NAMESPACE_BEGIN(Grid);
#ifdef GRID_TRACING_NVTX
#include <nvToolsExt.h>
class GridTracer {
public:
GridTracer(const char* name) {
nvtxRangePushA(name);
}
~GridTracer() {
nvtxRangePop();
}
};
inline void tracePush(const char *name) { nvtxRangePushA(name); }
inline void tracePop(const char *name) { nvtxRangePop(); }
inline int traceStart(const char *name) { }
inline void traceStop(int ID) { }
#endif
#ifdef GRID_TRACING_ROCTX
#include <roctracer/roctx.h>
class GridTracer {
public:
GridTracer(const char* name) {
roctxRangePushA(name);
std::cout << "roctxRangePush "<<name<<std::endl;
}
~GridTracer() {
roctxRangePop();
std::cout << "roctxRangePop "<<std::endl;
}
};
inline void tracePush(const char *name) { roctxRangePushA(name); }
inline void tracePop(const char *name) { roctxRangePop(); }
inline int traceStart(const char *name) { roctxRangeStart(name); }
inline void traceStop(int ID) { roctxRangeStop(ID); }
#endif
#ifdef GRID_TRACING_TIMER
class GridTracer {
public:
const char *name;
double elapsed;
GridTracer(const char* _name) {
name = _name;
elapsed=-usecond();
}
~GridTracer() {
elapsed+=usecond();
std::cout << GridLogTracing << name << " took " <<elapsed<< " us" <<std::endl;
}
};
inline void tracePush(const char *name) { }
inline void tracePop(const char *name) { }
inline int traceStart(const char *name) { return 0; }
inline void traceStop(int ID) { }
#endif
#ifdef GRID_TRACING_NONE
#define GRID_TRACE(name)
inline void tracePush(const char *name) { }
inline void tracePop(const char *name) { }
inline int traceStart(const char *name) { return 0; }
inline void traceStop(int ID) { }
#else
#define GRID_TRACE(name) GridTracer uniq_name_using_macros##__COUNTER__(name);
#endif
NAMESPACE_END(Grid);

View File

@ -16,8 +16,12 @@
#ifdef __NVCC__
#pragma push
#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5)
#pragma nv_diag_suppress declared_but_not_referenced // suppress "function was declared but never referenced warning"
#else
#pragma diag_suppress declared_but_not_referenced // suppress "function was declared but never referenced warning"
#endif
#endif
#include "pugixml.h"

View File

@ -63,7 +63,6 @@ static constexpr int Ngp=2; // gparity index range
#define ColourIndex (2)
#define SpinIndex (1)
#define LorentzIndex (0)
#define GparityFlavourIndex (0)
// Also should make these a named enum type
static constexpr int DaggerNo=0;
@ -88,8 +87,6 @@ template<typename T> struct isCoarsened {
template <typename T> using IfCoarsened = Invoke<std::enable_if< isCoarsened<T>::value,int> > ;
template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ;
const int GparityFlavourTensorIndex = 3; //TensorLevel counts from the bottom!
// ChrisK very keen to add extra space for Gparity doubling.
//
// Also add domain wall index, in a way where Wilson operator
@ -113,10 +110,8 @@ template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVec
template<typename vtype> using iSpinColourSpinColourMatrix = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >;
template<typename vtype> using iGparityFlavourVector = iVector<iScalar<iScalar<vtype> >, Ngp>;
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
template<typename vtype> using iGparityFlavourMatrix = iMatrix<iScalar<iScalar<vtype> >, Ngp>;
// Spin matrix
typedef iSpinMatrix<Complex > SpinMatrix;
@ -181,16 +176,6 @@ typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF;
typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD;
//G-parity flavour matrix
typedef iGparityFlavourMatrix<Complex> GparityFlavourMatrix;
typedef iGparityFlavourMatrix<ComplexF> GparityFlavourMatrixF;
typedef iGparityFlavourMatrix<ComplexD> GparityFlavourMatrixD;
typedef iGparityFlavourMatrix<vComplex> vGparityFlavourMatrix;
typedef iGparityFlavourMatrix<vComplexF> vGparityFlavourMatrixF;
typedef iGparityFlavourMatrix<vComplexD> vGparityFlavourMatrixD;
// Spin vector
typedef iSpinVector<Complex > SpinVector;
typedef iSpinVector<ComplexF> SpinVectorF;
@ -235,16 +220,6 @@ typedef iHalfSpinColourVector<ComplexD> HalfSpinColourVectorD;
typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF;
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
//G-parity flavour vector
typedef iGparityFlavourVector<Complex > GparityFlavourVector;
typedef iGparityFlavourVector<ComplexF> GparityFlavourVectorF;
typedef iGparityFlavourVector<ComplexD> GparityFlavourVectorD;
typedef iGparityFlavourVector<vComplex > vGparityFlavourVector;
typedef iGparityFlavourVector<vComplexF> vGparityFlavourVectorF;
typedef iGparityFlavourVector<vComplexD> vGparityFlavourVectorD;
// singlets
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
@ -476,9 +451,20 @@ template<class vobj> void pokeLorentz(vobj &lhs,const decltype(peekIndex<Lorentz
// Fermion <-> propagator assignements
//////////////////////////////////////////////
//template <class Prop, class Ferm>
#define FAST_FERM_TO_PROP
template <class Fimpl>
void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c)
{
#ifdef FAST_FERM_TO_PROP
autoView(p_v,p,CpuWrite);
autoView(f_v,f,CpuRead);
thread_for(idx,p_v.oSites(),{
for(int ss = 0; ss < Ns; ++ss) {
for(int cc = 0; cc < Fimpl::Dimension; ++cc) {
p_v[idx]()(ss,s)(cc,c) = f_v[idx]()(ss)(cc); // Propagator sink index is LEFT, suitable for left mult by gauge link (e.g.)
}}
});
#else
for(int j = 0; j < Ns; ++j)
{
auto pjs = peekSpin(p, j, s);
@ -490,12 +476,23 @@ void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::Fermio
}
pokeSpin(p, pjs, j, s);
}
#endif
}
//template <class Prop, class Ferm>
template <class Fimpl>
void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c)
{
#ifdef FAST_FERM_TO_PROP
autoView(p_v,p,CpuRead);
autoView(f_v,f,CpuWrite);
thread_for(idx,p_v.oSites(),{
for(int ss = 0; ss < Ns; ++ss) {
for(int cc = 0; cc < Fimpl::Dimension; ++cc) {
f_v[idx]()(ss)(cc) = p_v[idx]()(ss,s)(cc,c); // LEFT index is copied across for s,c right index
}}
});
#else
for(int j = 0; j < Ns; ++j)
{
auto pjs = peekSpin(p, j, s);
@ -507,6 +504,7 @@ void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::Propagato
}
pokeSpin(f, fj, j);
}
#endif
}
//////////////////////////////////////////////

View File

@ -36,8 +36,7 @@ NAMESPACE_BEGIN(Grid);
// These can move into a params header and be given MacroMagic serialisation
struct GparityWilsonImplParams {
Coordinate twists; //Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.
//mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
Coordinate twists;
GparityWilsonImplParams() : twists(Nd, 0) {};
};
@ -66,8 +65,7 @@ struct StaggeredImplParams {
RealD, tolerance,
int, degree,
int, precision,
int, BoundsCheckFreq,
RealD, BoundsCheckTol);
int, BoundsCheckFreq);
// MaxIter and tolerance, vectors??
@ -78,61 +76,15 @@ struct StaggeredImplParams {
RealD tol = 1.0e-8,
int _degree = 10,
int _precision = 64,
int _BoundsCheckFreq=20,
double _BoundsCheckTol=1e-6)
int _BoundsCheckFreq=20)
: lo(_lo),
hi(_hi),
MaxIter(_maxit),
tolerance(tol),
degree(_degree),
precision(_precision),
BoundsCheckFreq(_BoundsCheckFreq),
BoundsCheckTol(_BoundsCheckTol){};
BoundsCheckFreq(_BoundsCheckFreq){};
};
/*Action parameters for the generalized rational action
The approximation is for (M^dag M)^{1/inv_pow}
where inv_pow is the denominator of the fractional power.
Default inv_pow=2 for square root, making this equivalent to
the OneFlavourRational action
*/
struct RationalActionParams : Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(RationalActionParams,
int, inv_pow,
RealD, lo, //low eigenvalue bound of rational approx
RealD, hi, //high eigenvalue bound of rational approx
int, MaxIter, //maximum iterations in msCG
RealD, action_tolerance, //msCG tolerance in action evaluation
int, action_degree, //rational approx tolerance in action evaluation
RealD, md_tolerance, //msCG tolerance in MD integration
int, md_degree, //rational approx tolerance in MD integration
int, precision, //precision of floating point arithmetic
int, BoundsCheckFreq); //frequency the approximation is tested (with Metropolis degree/tolerance); 0 disables the check
// constructor
RationalActionParams(int _inv_pow = 2,
RealD _lo = 0.0,
RealD _hi = 1.0,
int _maxit = 1000,
RealD _action_tolerance = 1.0e-8,
int _action_degree = 10,
RealD _md_tolerance = 1.0e-8,
int _md_degree = 10,
int _precision = 64,
int _BoundsCheckFreq=20)
: inv_pow(_inv_pow),
lo(_lo),
hi(_hi),
MaxIter(_maxit),
action_tolerance(_action_tolerance),
action_degree(_action_degree),
md_tolerance(_md_tolerance),
md_degree(_md_degree),
precision(_precision),
BoundsCheckFreq(_BoundsCheckFreq){};
};
NAMESPACE_END(Grid);

View File

@ -68,9 +68,16 @@ public:
///////////////////////////////////////////////////////////////
// Support for MADWF tricks
///////////////////////////////////////////////////////////////
RealD Mass(void) { return mass; };
RealD Mass(void) { return (mass_plus + mass_minus) / 2.0; };
RealD MassPlus(void) { return mass_plus; };
RealD MassMinus(void) { return mass_minus; };
void SetMass(RealD _mass) {
mass=_mass;
mass_plus=mass_minus=_mass;
SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs
} ;
void SetMass(RealD _mass_plus, RealD _mass_minus) {
mass_plus=_mass_plus;
mass_minus=_mass_minus;
SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs
} ;
void P(const FermionField &psi, FermionField &chi);
@ -108,7 +115,7 @@ public:
void MeooeDag5D (const FermionField &in, FermionField &out);
// protected:
RealD mass;
RealD mass_plus, mass_minus;
// Save arguments to SetCoefficientsInternal
Vector<Coeff_t> _gamma;

View File

@ -0,0 +1,333 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonCloverFermionImplementation.h
Copyright (C) 2017 - 2022
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Daniel Richtmann <daniel.richtmann@gmail.com>
Author: Mattia Bruno <mattia.bruno@cern.ch>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/Grid.h>
#include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/WilsonCloverHelpers.h>
////////////////////////////////////////////
// Standard Clover
// (4+m0) + csw * clover_term
// Exp Clover
// (4+m0) * exp(csw/(4+m0) clover_term)
// = (4+m0) + csw * clover_term + ...
////////////////////////////////////////////
NAMESPACE_BEGIN(Grid);
//////////////////////////////////
// Generic Standard Clover
//////////////////////////////////
template<class Impl>
class CloverHelpers: public WilsonCloverHelpers<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
INHERIT_CLOVER_TYPES(Impl);
typedef WilsonCloverHelpers<Impl> Helpers;
static void Instantiate(CloverField& CloverTerm, CloverField& CloverTermInv, RealD csw_t, RealD diag_mass) {
GridBase *grid = CloverTerm.Grid();
CloverTerm += diag_mass;
int lvol = grid->lSites();
int DimRep = Impl::Dimension;
{
autoView(CTv,CloverTerm,CpuRead);
autoView(CTIv,CloverTermInv,CpuWrite);
thread_for(site, lvol, {
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero();
peekLocalSite(Qx, CTv, lcoor);
for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++)
for (int a = 0; a < DimRep; a++)
for (int b = 0; b < DimRep; b++){
auto zz = Qx()(j, k)(a, b);
EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex<double>(zz);
}
EigenInvCloverOp = EigenCloverOp.inverse();
for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++)
for (int a = 0; a < DimRep; a++)
for (int b = 0; b < DimRep; b++)
Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep);
pokeLocalSite(Qxinv, CTIv, lcoor);
});
}
}
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
return Helpers::Cmunu(U, lambda, mu, nu);
}
};
//////////////////////////////////
// Generic Exp Clover
//////////////////////////////////
template<class Impl>
class ExpCloverHelpers: public WilsonCloverHelpers<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
INHERIT_CLOVER_TYPES(Impl);
template <typename vtype> using iImplClover = iScalar<iMatrix<iMatrix<vtype, Impl::Dimension>, Ns>>;
typedef WilsonCloverHelpers<Impl> Helpers;
// Can this be avoided?
static void IdentityTimesC(const CloverField& in, RealD c) {
int DimRep = Impl::Dimension;
autoView(in_v, in, AcceleratorWrite);
accelerator_for(ss, in.Grid()->oSites(), 1, {
for (int sa=0; sa<Ns; sa++)
for (int ca=0; ca<DimRep; ca++)
in_v[ss]()(sa,sa)(ca,ca) = c;
});
}
static int getNMAX(RealD prec, RealD R) {
/* compute stop condition for exponential */
int NMAX=1;
RealD cond=R*R/2.;
while (cond*std::exp(R)>prec) {
NMAX++;
cond*=R/(double)(NMAX+1);
}
return NMAX;
}
static int getNMAX(Lattice<iImplClover<vComplexD>> &t, RealD R) {return getNMAX(1e-12,R);}
static int getNMAX(Lattice<iImplClover<vComplexF>> &t, RealD R) {return getNMAX(1e-6,R);}
static void Instantiate(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) {
GridBase* grid = Clover.Grid();
CloverField ExpClover(grid);
int NMAX = getNMAX(Clover, 3.*csw_t/diag_mass);
Clover *= (1.0/diag_mass);
// Taylor expansion, slow but generic
// Horner scheme: a0 + a1 x + a2 x^2 + .. = a0 + x (a1 + x(...))
// qN = cN
// qn = cn + qn+1 X
std::vector<RealD> cn(NMAX+1);
cn[0] = 1.0;
for (int i=1; i<=NMAX; i++)
cn[i] = cn[i-1] / RealD(i);
ExpClover = Zero();
IdentityTimesC(ExpClover, cn[NMAX]);
for (int i=NMAX-1; i>=0; i--)
ExpClover = ExpClover * Clover + cn[i];
// prepare inverse
CloverInv = (-1.0)*Clover;
Clover = ExpClover * diag_mass;
ExpClover = Zero();
IdentityTimesC(ExpClover, cn[NMAX]);
for (int i=NMAX-1; i>=0; i--)
ExpClover = ExpClover * CloverInv + cn[i];
CloverInv = ExpClover * (1.0/diag_mass);
}
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
assert(0);
return lambda;
}
};
//////////////////////////////////
// Compact Standard Clover
//////////////////////////////////
template<class Impl>
class CompactCloverHelpers: public CompactWilsonCloverHelpers<Impl>,
public WilsonCloverHelpers<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
INHERIT_CLOVER_TYPES(Impl);
INHERIT_COMPACT_CLOVER_TYPES(Impl);
typedef WilsonCloverHelpers<Impl> Helpers;
typedef CompactWilsonCloverHelpers<Impl> CompactHelpers;
static void InstantiateClover(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) {
Clover += diag_mass;
}
static void InvertClover(CloverField& InvClover,
const CloverDiagonalField& diagonal,
const CloverTriangleField& triangle,
CloverDiagonalField& diagonalInv,
CloverTriangleField& triangleInv,
bool fixedBoundaries) {
CompactHelpers::Invert(diagonal, triangle, diagonalInv, triangleInv);
}
// TODO: implement Cmunu for better performances with compact layout, but don't do it
// here, but rather in WilsonCloverHelpers.h -> CompactWilsonCloverHelpers
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
return Helpers::Cmunu(U, lambda, mu, nu);
}
};
//////////////////////////////////
// Compact Exp Clover
//////////////////////////////////
template<class Impl>
class CompactExpCloverHelpers: public CompactWilsonCloverHelpers<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
INHERIT_CLOVER_TYPES(Impl);
INHERIT_COMPACT_CLOVER_TYPES(Impl);
template <typename vtype> using iImplClover = iScalar<iMatrix<iMatrix<vtype, Impl::Dimension>, Ns>>;
typedef CompactWilsonCloverHelpers<Impl> CompactHelpers;
// Can this be avoided?
static void IdentityTimesC(const CloverField& in, RealD c) {
int DimRep = Impl::Dimension;
autoView(in_v, in, AcceleratorWrite);
accelerator_for(ss, in.Grid()->oSites(), 1, {
for (int sa=0; sa<Ns; sa++)
for (int ca=0; ca<DimRep; ca++)
in_v[ss]()(sa,sa)(ca,ca) = c;
});
}
static int getNMAX(RealD prec, RealD R) {
/* compute stop condition for exponential */
int NMAX=1;
RealD cond=R*R/2.;
while (cond*std::exp(R)>prec) {
NMAX++;
cond*=R/(double)(NMAX+1);
}
return NMAX;
}
static int getNMAX(Lattice<iImplClover<vComplexD>> &t, RealD R) {return getNMAX(1e-12,R);}
static int getNMAX(Lattice<iImplClover<vComplexF>> &t, RealD R) {return getNMAX(1e-6,R);}
static void InstantiateClover(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) {
GridBase* grid = Clover.Grid();
CloverField ExpClover(grid);
int NMAX = getNMAX(Clover, 3.*csw_t/diag_mass);
Clover *= (1.0/diag_mass);
// Taylor expansion, slow but generic
// Horner scheme: a0 + a1 x + a2 x^2 + .. = a0 + x (a1 + x(...))
// qN = cN
// qn = cn + qn+1 X
std::vector<RealD> cn(NMAX+1);
cn[0] = 1.0;
for (int i=1; i<=NMAX; i++)
cn[i] = cn[i-1] / RealD(i);
ExpClover = Zero();
IdentityTimesC(ExpClover, cn[NMAX]);
for (int i=NMAX-1; i>=0; i--)
ExpClover = ExpClover * Clover + cn[i];
// prepare inverse
CloverInv = (-1.0)*Clover;
Clover = ExpClover * diag_mass;
ExpClover = Zero();
IdentityTimesC(ExpClover, cn[NMAX]);
for (int i=NMAX-1; i>=0; i--)
ExpClover = ExpClover * CloverInv + cn[i];
CloverInv = ExpClover * (1.0/diag_mass);
}
static void InvertClover(CloverField& InvClover,
const CloverDiagonalField& diagonal,
const CloverTriangleField& triangle,
CloverDiagonalField& diagonalInv,
CloverTriangleField& triangleInv,
bool fixedBoundaries) {
if (fixedBoundaries)
{
CompactHelpers::Invert(diagonal, triangle, diagonalInv, triangleInv);
}
else
{
CompactHelpers::ConvertLayout(InvClover, diagonalInv, triangleInv);
}
}
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
assert(0);
return lambda;
}
};
NAMESPACE_END(Grid);

View File

@ -31,6 +31,7 @@
#include <Grid/qcd/action/fermion/WilsonCloverTypes.h>
#include <Grid/qcd/action/fermion/WilsonCloverHelpers.h>
#include <Grid/qcd/action/fermion/CloverHelpers.h>
NAMESPACE_BEGIN(Grid);
@ -85,7 +86,7 @@ NAMESPACE_BEGIN(Grid);
// + (2 * 1 + 4 * 1/2) triangle parts = 4 triangle parts = 60 complex words per site
// = 84 complex words per site
template<class Impl>
template<class Impl, class CloverHelpers>
class CompactWilsonCloverFermion : public WilsonFermion<Impl>,
public WilsonCloverHelpers<Impl>,
public CompactWilsonCloverHelpers<Impl> {
@ -224,7 +225,7 @@ public:
RealD csw_t;
RealD cF;
bool open_boundaries;
bool fixedBoundaries;
CloverDiagonalField Diagonal, DiagonalEven, DiagonalOdd;
CloverDiagonalField DiagonalInv, DiagonalInvEven, DiagonalInvOdd;

View File

@ -138,38 +138,52 @@ typedef WilsonTMFermion<WilsonImplF> WilsonTMFermionF;
typedef WilsonTMFermion<WilsonImplD> WilsonTMFermionD;
// Clover fermions
typedef WilsonCloverFermion<WilsonImplR> WilsonCloverFermionR;
typedef WilsonCloverFermion<WilsonImplF> WilsonCloverFermionF;
typedef WilsonCloverFermion<WilsonImplD> WilsonCloverFermionD;
template <typename WImpl> using WilsonClover = WilsonCloverFermion<WImpl, CloverHelpers<WImpl>>;
template <typename WImpl> using WilsonExpClover = WilsonCloverFermion<WImpl, ExpCloverHelpers<WImpl>>;
typedef WilsonCloverFermion<WilsonAdjImplR> WilsonCloverAdjFermionR;
typedef WilsonCloverFermion<WilsonAdjImplF> WilsonCloverAdjFermionF;
typedef WilsonCloverFermion<WilsonAdjImplD> WilsonCloverAdjFermionD;
typedef WilsonClover<WilsonImplR> WilsonCloverFermionR;
typedef WilsonClover<WilsonImplF> WilsonCloverFermionF;
typedef WilsonClover<WilsonImplD> WilsonCloverFermionD;
typedef WilsonCloverFermion<WilsonTwoIndexSymmetricImplR> WilsonCloverTwoIndexSymmetricFermionR;
typedef WilsonCloverFermion<WilsonTwoIndexSymmetricImplF> WilsonCloverTwoIndexSymmetricFermionF;
typedef WilsonCloverFermion<WilsonTwoIndexSymmetricImplD> WilsonCloverTwoIndexSymmetricFermionD;
typedef WilsonExpClover<WilsonImplR> WilsonExpCloverFermionR;
typedef WilsonExpClover<WilsonImplF> WilsonExpCloverFermionF;
typedef WilsonExpClover<WilsonImplD> WilsonExpCloverFermionD;
typedef WilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplR> WilsonCloverTwoIndexAntiSymmetricFermionR;
typedef WilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplF> WilsonCloverTwoIndexAntiSymmetricFermionF;
typedef WilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplD> WilsonCloverTwoIndexAntiSymmetricFermionD;
typedef WilsonClover<WilsonAdjImplR> WilsonCloverAdjFermionR;
typedef WilsonClover<WilsonAdjImplF> WilsonCloverAdjFermionF;
typedef WilsonClover<WilsonAdjImplD> WilsonCloverAdjFermionD;
typedef WilsonClover<WilsonTwoIndexSymmetricImplR> WilsonCloverTwoIndexSymmetricFermionR;
typedef WilsonClover<WilsonTwoIndexSymmetricImplF> WilsonCloverTwoIndexSymmetricFermionF;
typedef WilsonClover<WilsonTwoIndexSymmetricImplD> WilsonCloverTwoIndexSymmetricFermionD;
typedef WilsonClover<WilsonTwoIndexAntiSymmetricImplR> WilsonCloverTwoIndexAntiSymmetricFermionR;
typedef WilsonClover<WilsonTwoIndexAntiSymmetricImplF> WilsonCloverTwoIndexAntiSymmetricFermionF;
typedef WilsonClover<WilsonTwoIndexAntiSymmetricImplD> WilsonCloverTwoIndexAntiSymmetricFermionD;
// Compact Clover fermions
typedef CompactWilsonCloverFermion<WilsonImplR> CompactWilsonCloverFermionR;
typedef CompactWilsonCloverFermion<WilsonImplF> CompactWilsonCloverFermionF;
typedef CompactWilsonCloverFermion<WilsonImplD> CompactWilsonCloverFermionD;
template <typename WImpl> using CompactWilsonClover = CompactWilsonCloverFermion<WImpl, CompactCloverHelpers<WImpl>>;
template <typename WImpl> using CompactWilsonExpClover = CompactWilsonCloverFermion<WImpl, CompactExpCloverHelpers<WImpl>>;
typedef CompactWilsonCloverFermion<WilsonAdjImplR> CompactWilsonCloverAdjFermionR;
typedef CompactWilsonCloverFermion<WilsonAdjImplF> CompactWilsonCloverAdjFermionF;
typedef CompactWilsonCloverFermion<WilsonAdjImplD> CompactWilsonCloverAdjFermionD;
typedef CompactWilsonClover<WilsonImplR> CompactWilsonCloverFermionR;
typedef CompactWilsonClover<WilsonImplF> CompactWilsonCloverFermionF;
typedef CompactWilsonClover<WilsonImplD> CompactWilsonCloverFermionD;
typedef CompactWilsonCloverFermion<WilsonTwoIndexSymmetricImplR> CompactWilsonCloverTwoIndexSymmetricFermionR;
typedef CompactWilsonCloverFermion<WilsonTwoIndexSymmetricImplF> CompactWilsonCloverTwoIndexSymmetricFermionF;
typedef CompactWilsonCloverFermion<WilsonTwoIndexSymmetricImplD> CompactWilsonCloverTwoIndexSymmetricFermionD;
typedef CompactWilsonExpClover<WilsonImplR> CompactWilsonExpCloverFermionR;
typedef CompactWilsonExpClover<WilsonImplF> CompactWilsonExpCloverFermionF;
typedef CompactWilsonExpClover<WilsonImplD> CompactWilsonExpCloverFermionD;
typedef CompactWilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplR> CompactWilsonCloverTwoIndexAntiSymmetricFermionR;
typedef CompactWilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplF> CompactWilsonCloverTwoIndexAntiSymmetricFermionF;
typedef CompactWilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplD> CompactWilsonCloverTwoIndexAntiSymmetricFermionD;
typedef CompactWilsonClover<WilsonAdjImplR> CompactWilsonCloverAdjFermionR;
typedef CompactWilsonClover<WilsonAdjImplF> CompactWilsonCloverAdjFermionF;
typedef CompactWilsonClover<WilsonAdjImplD> CompactWilsonCloverAdjFermionD;
typedef CompactWilsonClover<WilsonTwoIndexSymmetricImplR> CompactWilsonCloverTwoIndexSymmetricFermionR;
typedef CompactWilsonClover<WilsonTwoIndexSymmetricImplF> CompactWilsonCloverTwoIndexSymmetricFermionF;
typedef CompactWilsonClover<WilsonTwoIndexSymmetricImplD> CompactWilsonCloverTwoIndexSymmetricFermionD;
typedef CompactWilsonClover<WilsonTwoIndexAntiSymmetricImplR> CompactWilsonCloverTwoIndexAntiSymmetricFermionR;
typedef CompactWilsonClover<WilsonTwoIndexAntiSymmetricImplF> CompactWilsonCloverTwoIndexAntiSymmetricFermionF;
typedef CompactWilsonClover<WilsonTwoIndexAntiSymmetricImplD> CompactWilsonCloverTwoIndexAntiSymmetricFermionD;
// Domain Wall fermions
typedef DomainWallFermion<WilsonImplR> DomainWallFermionR;

View File

@ -30,18 +30,6 @@ directory
NAMESPACE_BEGIN(Grid);
/*
Policy implementation for G-parity boundary conditions
Rather than treating the gauge field as a flavored field, the Grid implementation of G-parity treats the gauge field as a regular
field with complex conjugate boundary conditions. In order to ensure the second flavor interacts with the conjugate links and the first
with the regular links we overload the functionality of doubleStore, whose purpose is to store the gauge field and the barrel-shifted gauge field
to avoid communicating links when applying the Dirac operator, such that the double-stored field contains also a flavor index which maps to
either the link or the conjugate link. This flavored field is then used by multLink to apply the correct link to a spinor.
Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.
mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
*/
template <class S, class Representation = FundamentalRepresentation, class Options=CoeffReal>
class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Representation::Dimension> > {
public:
@ -125,7 +113,7 @@ public:
|| ((distance== 1)&&(icoor[direction]==1))
|| ((distance==-1)&&(icoor[direction]==0));
permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu] && mmu < Nd-1; //only if we are going around the world in a spatial direction
permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu]; //only if we are going around the world
//Apply the links
int f_upper = permute_lane ? 1 : 0;
@ -151,10 +139,10 @@ public:
assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
assert((sl == 1) || (sl == 2));
//If this site is an global boundary site, perform the G-parity flavor twist
if ( mmu < Nd-1 && SE->_around_the_world && St.parameters.twists[mmu] ) {
if ( SE->_around_the_world && St.parameters.twists[mmu] ) {
if ( sl == 2 ) {
//Only do the twist for lanes on the edge of the physical node
ExtractBuffer<sobj> vals(Nsimd);
extract(chi,vals);
@ -209,19 +197,6 @@ public:
reg = memory;
}
//Poke 'poke_f0' onto flavor 0 and 'poke_f1' onto flavor 1 in direction mu of the doubled gauge field Uds
inline void pokeGparityDoubledGaugeField(DoubledGaugeField &Uds, const GaugeLinkField &poke_f0, const GaugeLinkField &poke_f1, const int mu){
autoView(poke_f0_v, poke_f0, CpuRead);
autoView(poke_f1_v, poke_f1, CpuRead);
autoView(Uds_v, Uds, CpuWrite);
thread_foreach(ss,poke_f0_v,{
Uds_v[ss](0)(mu) = poke_f0_v[ss]();
Uds_v[ss](1)(mu) = poke_f1_v[ss]();
});
}
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
{
conformable(Uds.Grid(),GaugeGrid);
@ -232,19 +207,14 @@ public:
GaugeLinkField Uconj(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid);
//Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.
//mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
for(int mu=0;mu<Nd-1;mu++){
if( Params.twists[mu] ){
LatticeCoordinate(coor,mu);
}
for(int mu=0;mu<Nd;mu++){
LatticeCoordinate(coor,mu);
U = PeekIndex<LorentzIndex>(Umu,mu);
Uconj = conjugate(U);
// Implement the isospin rotation sign on the boundary between f=1 and f=0
// This phase could come from a simple bc 1,1,-1,1 ..
int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
if ( Params.twists[mu] ) {
@ -259,7 +229,7 @@ public:
thread_foreach(ss,U_v,{
Uds_v[ss](0)(mu) = U_v[ss]();
Uds_v[ss](1)(mu) = Uconj_v[ss]();
});
});
}
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
@ -290,38 +260,6 @@ public:
});
}
}
{ //periodic / antiperiodic temporal BCs
int mu = Nd-1;
int L = GaugeGrid->GlobalDimensions()[mu];
int Lmu = L - 1;
LatticeCoordinate(coor, mu);
U = PeekIndex<LorentzIndex>(Umu, mu); //Get t-directed links
GaugeLinkField *Upoke = &U;
if(Params.twists[mu]){ //antiperiodic
Utmp = where(coor == Lmu, -U, U);
Upoke = &Utmp;
}
Uconj = conjugate(*Upoke); //second flavor interacts with conjugate links
pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu);
//Get the barrel-shifted field
Utmp = adj(Cshift(U, mu, -1)); //is a forward shift!
Upoke = &Utmp;
if(Params.twists[mu]){
U = where(coor == 0, -Utmp, Utmp); //boundary phase
Upoke = &U;
}
Uconj = conjugate(*Upoke);
pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu + 4);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) {
@ -360,48 +298,28 @@ public:
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){
assert(0);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) {
int Ls=Btilde.Grid()->_fdimensions[0];
{
GridBase *GaugeGrid = mat.Grid();
Lattice<iScalar<vInteger> > coor(GaugeGrid);
if( Params.twists[mu] ){
LatticeCoordinate(coor,mu);
}
autoView( mat_v , mat, AcceleratorWrite);
autoView( Btilde_v , Btilde, AcceleratorRead);
autoView( Atilde_v , Atilde, AcceleratorRead);
accelerator_for(sss,mat.Grid()->oSites(), FermionField::vector_type::Nsimd(),{
int sU=sss;
typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
ColorMatrixType sum;
zeroit(sum);
for(int s=0;s<Ls;s++){
int sF = s+Ls*sU;
for(int spn=0;spn<Ns;spn++){ //sum over spin
//Flavor 0
auto bb = coalescedRead(Btilde_v[sF](0)(spn) ); //color vector
auto aa = coalescedRead(Atilde_v[sF](0)(spn) );
sum = sum + outerProduct(bb,aa);
//Flavor 1
bb = coalescedRead(Btilde_v[sF](1)(spn) );
aa = coalescedRead(Atilde_v[sF](1)(spn) );
sum = sum + conjugate(outerProduct(bb,aa));
}
}
coalescedWrite(mat_v[sU](mu)(), sum);
});
}
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) {
int Ls = Btilde.Grid()->_fdimensions[0];
GaugeLinkField tmp(mat.Grid());
tmp = Zero();
{
autoView( tmp_v , tmp, CpuWrite);
autoView( Atilde_v , Atilde, CpuRead);
autoView( Btilde_v , Btilde, CpuRead);
thread_for(ss,tmp.Grid()->oSites(),{
for (int s = 0; s < Ls; s++) {
int sF = s + Ls * ss;
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde_v[sF], Atilde_v[sF]));
tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
}
});
}
PokeIndex<LorentzIndex>(mat, tmp, mu);
return;
}
};

View File

@ -32,6 +32,7 @@
#include <Grid/qcd/action/fermion/WilsonCloverTypes.h>
#include <Grid/qcd/action/fermion/WilsonCloverHelpers.h>
#include <Grid/qcd/action/fermion/CloverHelpers.h>
NAMESPACE_BEGIN(Grid);
@ -51,7 +52,7 @@ NAMESPACE_BEGIN(Grid);
// csw_r = csw_t to recover the isotropic version
//////////////////////////////////////////////////////////////////
template <class Impl>
template<class Impl, class CloverHelpers>
class WilsonCloverFermion : public WilsonFermion<Impl>,
public WilsonCloverHelpers<Impl>
{

View File

@ -209,6 +209,8 @@ public:
};
////////////////////////////////////////////////////////
template<class Impl> class CompactWilsonCloverHelpers {
public:

View File

@ -47,8 +47,6 @@ class CompactWilsonCloverTypes {
public:
INHERIT_IMPL_TYPES(Impl);
static_assert(Nd == 4 && Nc == 3 && Ns == 4 && Impl::Dimension == 3, "Wrong dimensions");
static constexpr int Nred = Nc * Nhs; // 6
static constexpr int Nblock = Nhs; // 2
static constexpr int Ndiagonal = Nred; // 6

View File

@ -117,19 +117,19 @@ public:
typedef decltype(coalescedRead(*in)) sobj;
typedef decltype(coalescedRead(*out0)) hsobj;
unsigned int Nsimd = vobj::Nsimd();
constexpr unsigned int Nsimd = vobj::Nsimd();
unsigned int mask = Nsimd >> (type + 1);
int lane = acceleratorSIMTlane(Nsimd);
int j0 = lane &(~mask); // inner coor zero
int j1 = lane |(mask) ; // inner coor one
const vobj *vp0 = &in[k];
const vobj *vp1 = &in[m];
const vobj *vp = (lane&mask) ? vp1:vp0;
auto sa = coalescedRead(*vp,j0);
auto sb = coalescedRead(*vp,j1);
const vobj *vp0 = &in[k]; // out0[j] = merge low bit of type from in[k] and in[m]
const vobj *vp1 = &in[m]; // out1[j] = merge hi bit of type from in[k] and in[m]
const vobj *vp = (lane&mask) ? vp1:vp0;// if my lane has high bit take vp1, low bit take vp0
auto sa = coalescedRead(*vp,j0); // lane to read for out 0, NB 50% read coalescing
auto sb = coalescedRead(*vp,j1); // lane to read for out 1
hsobj psa, psb;
projector::Proj(psa,sa,mu,dag);
projector::Proj(psb,sb,mu,dag);
projector::Proj(psa,sa,mu,dag); // spin project the result0
projector::Proj(psb,sb,mu,dag); // spin project the result1
coalescedWrite(out0[j],psa);
coalescedWrite(out1[j],psb);
#else

View File

@ -47,7 +47,7 @@ CayleyFermion5D<Impl>::CayleyFermion5D(GaugeField &_Umu,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_M5,p),
mass(_mass)
mass_plus(_mass), mass_minus(_mass)
{
}
@ -209,8 +209,8 @@ void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
Vector<Coeff_t> diag (Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass;
Vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass;
Vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass_minus;
Vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass_plus;
M5D(psi,chi,chi,lower,diag,upper);
}
template<class Impl>
@ -220,8 +220,8 @@ void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &D
Vector<Coeff_t> diag = bs;
Vector<Coeff_t> upper= cs;
Vector<Coeff_t> lower= cs;
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
upper[Ls-1]=-mass_minus*upper[Ls-1];
lower[0] =-mass_plus*lower[0];
M5D(psi,psi,Din,lower,diag,upper);
}
// FIXME Redunant with the above routine; check this and eliminate
@ -235,8 +235,8 @@ template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &
upper[i]=-ceo[i];
lower[i]=-ceo[i];
}
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
upper[Ls-1]=-mass_minus*upper[Ls-1];
lower[0] =-mass_plus*lower[0];
M5D(psi,psi,chi,lower,diag,upper);
}
template<class Impl>
@ -250,8 +250,8 @@ void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &
upper[i]=-cee[i];
lower[i]=-cee[i];
}
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
upper[Ls-1]=-mass_minus*upper[Ls-1];
lower[0] =-mass_plus*lower[0];
M5D(psi,psi,chi,lower,diag,upper);
}
template<class Impl>
@ -266,9 +266,9 @@ void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &
// Assemble the 5d matrix
if ( s==0 ) {
upper[s] = -cee[s+1] ;
lower[s] = mass*cee[Ls-1];
lower[s] = mass_minus*cee[Ls-1];
} else if ( s==(Ls-1)) {
upper[s] = mass*cee[0];
upper[s] = mass_plus*cee[0];
lower[s] = -cee[s-1];
} else {
upper[s]=-cee[s+1];
@ -291,8 +291,8 @@ void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
Vector<Coeff_t> diag(Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0);
Vector<Coeff_t> lower(Ls,-1.0);
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
upper[Ls-1]=-mass_plus*upper[Ls-1];
lower[0] =-mass_minus*lower[0];
M5Ddag(psi,chi,chi,lower,diag,upper);
}
@ -307,9 +307,9 @@ void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField
for (int s=0;s<Ls;s++){
if ( s== 0 ) {
upper[s] = cs[s+1];
lower[s] =-mass*cs[Ls-1];
lower[s] =-mass_minus*cs[Ls-1];
} else if ( s==(Ls-1) ) {
upper[s] =-mass*cs[0];
upper[s] =-mass_plus*cs[0];
lower[s] = cs[s-1];
} else {
upper[s] = cs[s+1];
@ -552,7 +552,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,Vector<Coeff_t
lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column
leem[i]=mass*cee[Ls-1]/bee[0];
leem[i]=mass_minus*cee[Ls-1]/bee[0];
for(int j=0;j<i;j++) {
assert(bee[j+1]!=Coeff_t(0.0));
leem[i]*= aee[j]/bee[j+1];
@ -560,7 +560,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,Vector<Coeff_t
uee[i] =-aee[i]/bee[i]; // up-diag entry on the ith row
ueem[i]=mass;
ueem[i]=mass_plus;
for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j];
ueem[i]*= aee[0]/bee[0];
@ -573,7 +573,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,Vector<Coeff_t
}
{
Coeff_t delta_d=mass*cee[Ls-1];
Coeff_t delta_d=mass_minus*cee[Ls-1];
for(int j=0;j<Ls-1;j++) {
assert(bee[j] != Coeff_t(0.0));
delta_d *= cee[j]/bee[j];
@ -642,6 +642,10 @@ void CayleyFermion5D<Impl>::ContractConservedCurrent( PropagatorField &q_in_1,
Current curr_type,
unsigned int mu)
{
assert(mass_plus == mass_minus);
RealD mass = mass_plus;
#if (!defined(GRID_HIP))
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
@ -777,6 +781,8 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
assert(mu>=0);
assert(mu<Nd);
assert(mass_plus == mass_minus);
RealD mass = mass_plus;
#if 0
int tshift = (mu == Nd-1) ? 1 : 0;

View File

@ -32,22 +32,23 @@
#include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/CompactWilsonCloverFermion.h>
NAMESPACE_BEGIN(Grid);
template<class Impl>
CompactWilsonCloverFermion<Impl>::CompactWilsonCloverFermion(GaugeField& _Umu,
GridCartesian& Fgrid,
GridRedBlackCartesian& Hgrid,
const RealD _mass,
const RealD _csw_r,
const RealD _csw_t,
const RealD _cF,
const WilsonAnisotropyCoefficients& clover_anisotropy,
const ImplParams& impl_p)
template<class Impl, class CloverHelpers>
CompactWilsonCloverFermion<Impl, CloverHelpers>::CompactWilsonCloverFermion(GaugeField& _Umu,
GridCartesian& Fgrid,
GridRedBlackCartesian& Hgrid,
const RealD _mass,
const RealD _csw_r,
const RealD _csw_t,
const RealD _cF,
const WilsonAnisotropyCoefficients& clover_anisotropy,
const ImplParams& impl_p)
: WilsonBase(_Umu, Fgrid, Hgrid, _mass, impl_p, clover_anisotropy)
, csw_r(_csw_r)
, csw_t(_csw_t)
, cF(_cF)
, open_boundaries(impl_p.boundary_phases[Nd-1] == 0.0)
, fixedBoundaries(impl_p.boundary_phases[Nd-1] == 0.0)
, Diagonal(&Fgrid), Triangle(&Fgrid)
, DiagonalEven(&Hgrid), TriangleEven(&Hgrid)
, DiagonalOdd(&Hgrid), TriangleOdd(&Hgrid)
@ -58,80 +59,85 @@ CompactWilsonCloverFermion<Impl>::CompactWilsonCloverFermion(GaugeField& _Umu,
, BoundaryMask(&Fgrid)
, BoundaryMaskEven(&Hgrid), BoundaryMaskOdd(&Hgrid)
{
assert(Nd == 4 && Nc == 3 && Ns == 4 && Impl::Dimension == 3);
csw_r *= 0.5;
csw_t *= 0.5;
if (clover_anisotropy.isAnisotropic)
csw_r /= clover_anisotropy.xi_0;
ImportGauge(_Umu);
if (open_boundaries)
if (fixedBoundaries) {
this->BoundaryMaskEven.Checkerboard() = Even;
this->BoundaryMaskOdd.Checkerboard() = Odd;
CompactHelpers::SetupMasks(this->BoundaryMask, this->BoundaryMaskEven, this->BoundaryMaskOdd);
}
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::Dhop(const FermionField& in, FermionField& out, int dag) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::Dhop(const FermionField& in, FermionField& out, int dag) {
WilsonBase::Dhop(in, out, dag);
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::DhopOE(const FermionField& in, FermionField& out, int dag) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::DhopOE(const FermionField& in, FermionField& out, int dag) {
WilsonBase::DhopOE(in, out, dag);
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::DhopEO(const FermionField& in, FermionField& out, int dag) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::DhopEO(const FermionField& in, FermionField& out, int dag) {
WilsonBase::DhopEO(in, out, dag);
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) {
WilsonBase::DhopDir(in, out, dir, disp);
if(this->open_boundaries) ApplyBoundaryMask(out);
if(this->fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::DhopDirAll(const FermionField& in, std::vector<FermionField>& out) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::DhopDirAll(const FermionField& in, std::vector<FermionField>& out) {
WilsonBase::DhopDirAll(in, out);
if(this->open_boundaries) {
if(this->fixedBoundaries) {
for(auto& o : out) ApplyBoundaryMask(o);
}
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::M(const FermionField& in, FermionField& out) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::M(const FermionField& in, FermionField& out) {
out.Checkerboard() = in.Checkerboard();
WilsonBase::Dhop(in, out, DaggerNo); // call base to save applying bc
Mooee(in, Tmp);
axpy(out, 1.0, out, Tmp);
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::Mdag(const FermionField& in, FermionField& out) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::Mdag(const FermionField& in, FermionField& out) {
out.Checkerboard() = in.Checkerboard();
WilsonBase::Dhop(in, out, DaggerYes); // call base to save applying bc
MooeeDag(in, Tmp);
axpy(out, 1.0, out, Tmp);
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::Meooe(const FermionField& in, FermionField& out) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::Meooe(const FermionField& in, FermionField& out) {
WilsonBase::Meooe(in, out);
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::MeooeDag(const FermionField& in, FermionField& out) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::MeooeDag(const FermionField& in, FermionField& out) {
WilsonBase::MeooeDag(in, out);
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::Mooee(const FermionField& in, FermionField& out) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::Mooee(const FermionField& in, FermionField& out) {
if(in.Grid()->_isCheckerBoarded) {
if(in.Checkerboard() == Odd) {
MooeeInternal(in, out, DiagonalOdd, TriangleOdd);
@ -141,16 +147,16 @@ void CompactWilsonCloverFermion<Impl>::Mooee(const FermionField& in, FermionFiel
} else {
MooeeInternal(in, out, Diagonal, Triangle);
}
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::MooeeDag(const FermionField& in, FermionField& out) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::MooeeDag(const FermionField& in, FermionField& out) {
Mooee(in, out); // blocks are hermitian
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::MooeeInv(const FermionField& in, FermionField& out) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::MooeeInv(const FermionField& in, FermionField& out) {
if(in.Grid()->_isCheckerBoarded) {
if(in.Checkerboard() == Odd) {
MooeeInternal(in, out, DiagonalInvOdd, TriangleInvOdd);
@ -160,27 +166,27 @@ void CompactWilsonCloverFermion<Impl>::MooeeInv(const FermionField& in, FermionF
} else {
MooeeInternal(in, out, DiagonalInv, TriangleInv);
}
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::MooeeInvDag(const FermionField& in, FermionField& out) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::MooeeInvDag(const FermionField& in, FermionField& out) {
MooeeInv(in, out); // blocks are hermitian
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::Mdir(const FermionField& in, FermionField& out, int dir, int disp) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::Mdir(const FermionField& in, FermionField& out, int dir, int disp) {
DhopDir(in, out, dir, disp);
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::MdirAll(const FermionField& in, std::vector<FermionField>& out) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::MdirAll(const FermionField& in, std::vector<FermionField>& out) {
DhopDirAll(in, out);
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) {
assert(!open_boundaries); // TODO check for changes required for open bc
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) {
assert(!fixedBoundaries); // TODO check for changes required for open bc
// NOTE: code copied from original clover term
conformable(X.Grid(), Y.Grid());
@ -251,7 +257,7 @@ void CompactWilsonCloverFermion<Impl>::MDeriv(GaugeField& force, const FermionFi
}
PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked
Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok
force_mu -= factor*Helpers::Cmunu(U, lambda, mu, nu); // checked
force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked
count++;
}
@ -261,18 +267,18 @@ void CompactWilsonCloverFermion<Impl>::MDeriv(GaugeField& force, const FermionFi
force += clover_force;
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) {
assert(0);
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) {
assert(0);
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::MooeeInternal(const FermionField& in,
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::MooeeInternal(const FermionField& in,
FermionField& out,
const CloverDiagonalField& diagonal,
const CloverTriangleField& triangle) {
@ -285,8 +291,8 @@ void CompactWilsonCloverFermion<Impl>::MooeeInternal(const FermionField&
CompactHelpers::MooeeKernel(diagonal.oSites(), 1, in, out, diagonal, triangle);
}
template<class Impl>
void CompactWilsonCloverFermion<Impl>::ImportGauge(const GaugeField& _Umu) {
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::ImportGauge(const GaugeField& _Umu) {
// NOTE: parts copied from original implementation
// Import gauge into base class
@ -299,6 +305,7 @@ void CompactWilsonCloverFermion<Impl>::ImportGauge(const GaugeField& _Umu) {
GridBase* grid = _Umu.Grid();
typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid);
CloverField TmpOriginal(grid);
CloverField TmpInverse(grid);
// Compute the field strength terms mu>nu
double t2 = usecond();
@ -318,22 +325,30 @@ void CompactWilsonCloverFermion<Impl>::ImportGauge(const GaugeField& _Umu) {
TmpOriginal += Helpers::fillCloverXT(Ex) * csw_t;
TmpOriginal += Helpers::fillCloverYT(Ey) * csw_t;
TmpOriginal += Helpers::fillCloverZT(Ez) * csw_t;
TmpOriginal += this->diag_mass;
// Instantiate the clover term
// - In case of the standard clover the mass term is added
// - In case of the exponential clover the clover term is exponentiated
double t4 = usecond();
CloverHelpers::InstantiateClover(TmpOriginal, TmpInverse, csw_t, this->diag_mass);
// Convert the data layout of the clover term
double t4 = usecond();
double t5 = usecond();
CompactHelpers::ConvertLayout(TmpOriginal, Diagonal, Triangle);
// Possible modify the boundary values
double t5 = usecond();
if(open_boundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, this->diag_mass);
// Invert the clover term in the improved layout
// Modify the clover term at the temporal boundaries in case of open boundary conditions
double t6 = usecond();
CompactHelpers::Invert(Diagonal, Triangle, DiagonalInv, TriangleInv);
if(fixedBoundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, this->diag_mass);
// Invert the Clover term
// In case of the exponential clover with (anti-)periodic boundary conditions exp(-Clover) saved
// in TmpInverse can be used. In all other cases the clover term has to be explictly inverted.
// TODO: For now this inversion is explictly done on the CPU
double t7 = usecond();
CloverHelpers::InvertClover(TmpInverse, Diagonal, Triangle, DiagonalInv, TriangleInv, fixedBoundaries);
// Fill the remaining clover fields
double t7 = usecond();
double t8 = usecond();
pickCheckerboard(Even, DiagonalEven, Diagonal);
pickCheckerboard(Even, TriangleEven, Triangle);
pickCheckerboard(Odd, DiagonalOdd, Diagonal);
@ -344,20 +359,19 @@ void CompactWilsonCloverFermion<Impl>::ImportGauge(const GaugeField& _Umu) {
pickCheckerboard(Odd, TriangleInvOdd, TriangleInv);
// Report timings
double t8 = usecond();
#if 0
std::cout << GridLogMessage << "CompactWilsonCloverFermion::ImportGauge timings:"
<< " WilsonFermion::Importgauge = " << (t1 - t0) / 1e6
<< ", allocations = " << (t2 - t1) / 1e6
<< ", field strength = " << (t3 - t2) / 1e6
<< ", fill clover = " << (t4 - t3) / 1e6
<< ", convert = " << (t5 - t4) / 1e6
<< ", boundaries = " << (t6 - t5) / 1e6
<< ", inversions = " << (t7 - t6) / 1e6
<< ", pick cbs = " << (t8 - t7) / 1e6
<< ", total = " << (t8 - t0) / 1e6
<< std::endl;
#endif
double t9 = usecond();
std::cout << GridLogDebug << "CompactWilsonCloverFermion::ImportGauge timings:" << std::endl;
std::cout << GridLogDebug << "WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 << std::endl;
std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl;
std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl;
std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl;
std::cout << GridLogDebug << "instantiate clover = " << (t5 - t4) / 1e6 << std::endl;
std::cout << GridLogDebug << "convert layout = " << (t6 - t5) / 1e6 << std::endl;
std::cout << GridLogDebug << "modify boundaries = " << (t7 - t6) / 1e6 << std::endl;
std::cout << GridLogDebug << "invert clover = " << (t8 - t7) / 1e6 << std::endl;
std::cout << GridLogDebug << "pick cbs = " << (t9 - t8) / 1e6 << std::endl;
std::cout << GridLogDebug << "total = " << (t9 - t0) / 1e6 << std::endl;
}
NAMESPACE_END(Grid);

View File

@ -34,8 +34,8 @@
NAMESPACE_BEGIN(Grid);
template<class Impl>
WilsonCloverFermion<Impl>::WilsonCloverFermion(GaugeField& _Umu,
template<class Impl, class CloverHelpers>
WilsonCloverFermion<Impl, CloverHelpers>::WilsonCloverFermion(GaugeField& _Umu,
GridCartesian& Fgrid,
GridRedBlackCartesian& Hgrid,
const RealD _mass,
@ -74,8 +74,8 @@ WilsonCloverFermion<Impl>::WilsonCloverFermion(GaugeField&
}
// *NOT* EO
template <class Impl>
void WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl, CloverHelpers>::M(const FermionField &in, FermionField &out)
{
FermionField temp(out.Grid());
@ -89,8 +89,8 @@ void WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
out += temp;
}
template <class Impl>
void WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl, CloverHelpers>::Mdag(const FermionField &in, FermionField &out)
{
FermionField temp(out.Grid());
@ -104,8 +104,8 @@ void WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
out += temp;
}
template <class Impl>
void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl, CloverHelpers>::ImportGauge(const GaugeField &_Umu)
{
double t0 = usecond();
WilsonFermion<Impl>::ImportGauge(_Umu);
@ -131,47 +131,11 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
CloverTerm += Helpers::fillCloverXT(Ex) * csw_t;
CloverTerm += Helpers::fillCloverYT(Ey) * csw_t;
CloverTerm += Helpers::fillCloverZT(Ez) * csw_t;
CloverTerm += diag_mass;
double t4 = usecond();
int lvol = _Umu.Grid()->lSites();
int DimRep = Impl::Dimension;
CloverHelpers::Instantiate(CloverTerm, CloverTermInv, csw_t, this->diag_mass);
double t5 = usecond();
{
autoView(CTv,CloverTerm,CpuRead);
autoView(CTIv,CloverTermInv,CpuWrite);
thread_for(site, lvol, {
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero();
peekLocalSite(Qx, CTv, lcoor);
//if (csw!=0){
for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++)
for (int a = 0; a < DimRep; a++)
for (int b = 0; b < DimRep; b++){
auto zz = Qx()(j, k)(a, b);
EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex<double>(zz);
}
// if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl;
EigenInvCloverOp = EigenCloverOp.inverse();
//std::cout << EigenInvCloverOp << std::endl;
for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++)
for (int a = 0; a < DimRep; a++)
for (int b = 0; b < DimRep; b++)
Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep);
// if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl;
// }
pokeLocalSite(Qxinv, CTIv, lcoor);
});
}
double t6 = usecond();
// Separate the even and odd parts
pickCheckerboard(Even, CloverTermEven, CloverTerm);
pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
@ -184,48 +148,44 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv));
pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv));
double t7 = usecond();
double t6 = usecond();
#if 0
std::cout << GridLogMessage << "WilsonCloverFermion::ImportGauge timings:"
<< " WilsonFermion::Importgauge = " << (t1 - t0) / 1e6
<< ", allocations = " << (t2 - t1) / 1e6
<< ", field strength = " << (t3 - t2) / 1e6
<< ", fill clover = " << (t4 - t3) / 1e6
<< ", misc = " << (t5 - t4) / 1e6
<< ", inversions = " << (t6 - t5) / 1e6
<< ", pick cbs = " << (t7 - t6) / 1e6
<< ", total = " << (t7 - t0) / 1e6
<< std::endl;
#endif
std::cout << GridLogDebug << "WilsonCloverFermion::ImportGauge timings:" << std::endl;
std::cout << GridLogDebug << "WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 << std::endl;
std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl;
std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl;
std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl;
std::cout << GridLogDebug << "instantiation = " << (t5 - t4) / 1e6 << std::endl;
std::cout << GridLogDebug << "pick cbs = " << (t6 - t5) / 1e6 << std::endl;
std::cout << GridLogDebug << "total = " << (t6 - t0) / 1e6 << std::endl;
}
template <class Impl>
void WilsonCloverFermion<Impl>::Mooee(const FermionField &in, FermionField &out)
template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl, CloverHelpers>::Mooee(const FermionField &in, FermionField &out)
{
this->MooeeInternal(in, out, DaggerNo, InverseNo);
}
template <class Impl>
void WilsonCloverFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out)
template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl, CloverHelpers>::MooeeDag(const FermionField &in, FermionField &out)
{
this->MooeeInternal(in, out, DaggerYes, InverseNo);
}
template <class Impl>
void WilsonCloverFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out)
template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl, CloverHelpers>::MooeeInv(const FermionField &in, FermionField &out)
{
this->MooeeInternal(in, out, DaggerNo, InverseYes);
}
template <class Impl>
void WilsonCloverFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl, CloverHelpers>::MooeeInvDag(const FermionField &in, FermionField &out)
{
this->MooeeInternal(in, out, DaggerYes, InverseYes);
}
template <class Impl>
void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv)
template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl, CloverHelpers>::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv)
{
out.Checkerboard() = in.Checkerboard();
CloverField *Clover;
@ -278,8 +238,8 @@ void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionFie
} // MooeeInternal
// Derivative parts unpreconditioned pseudofermions
template <class Impl>
void WilsonCloverFermion<Impl>::MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag)
template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl, CloverHelpers>::MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag)
{
conformable(X.Grid(), Y.Grid());
conformable(X.Grid(), force.Grid());
@ -349,7 +309,7 @@ void WilsonCloverFermion<Impl>::MDeriv(GaugeField &force, const FermionField &X,
}
PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked
Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok
force_mu -= factor*Helpers::Cmunu(U, lambda, mu, nu); // checked
force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked
count++;
}
@ -360,15 +320,15 @@ void WilsonCloverFermion<Impl>::MDeriv(GaugeField &force, const FermionField &X,
}
// Derivative parts
template <class Impl>
void WilsonCloverFermion<Impl>::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag)
template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl, CloverHelpers>::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag)
{
assert(0);
}
// Derivative parts
template <class Impl>
void WilsonCloverFermion<Impl>::MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl, CloverHelpers>::MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
{
assert(0); // not implemented yet
}

View File

@ -4,12 +4,13 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonFermion.cc
Copyright (C) 2015
Copyright (C) 2022
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Fabian Joswig <fabian.joswig@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -599,11 +600,47 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
Current curr_type,
unsigned int mu)
{
if(curr_type != Current::Vector)
{
std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl;
exit(1);
}
Gamma g5(Gamma::Algebra::Gamma5);
conformable(_grid, q_in_1.Grid());
conformable(_grid, q_in_2.Grid());
conformable(_grid, q_out.Grid());
assert(0);
auto UGrid= this->GaugeGrid();
PropagatorField tmp_shifted(UGrid);
PropagatorField g5Lg5(UGrid);
PropagatorField R(UGrid);
PropagatorField gmuR(UGrid);
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT,
};
Gamma gmu=Gamma(Gmu[mu]);
g5Lg5=g5*q_in_1*g5;
tmp_shifted=Cshift(q_in_2,mu,1);
Impl::multLinkField(R,this->Umu,tmp_shifted,mu);
gmuR=gmu*R;
q_out=adj(g5Lg5)*R;
q_out-=adj(g5Lg5)*gmuR;
tmp_shifted=Cshift(q_in_1,mu,1);
Impl::multLinkField(g5Lg5,this->Umu,tmp_shifted,mu);
g5Lg5=g5*g5Lg5*g5;
R=q_in_2;
gmuR=gmu*R;
q_out-=adj(g5Lg5)*R;
q_out-=adj(g5Lg5)*gmuR;
}
@ -617,9 +654,51 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
unsigned int tmax,
ComplexField &lattice_cmplx)
{
if(curr_type != Current::Vector)
{
std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl;
exit(1);
}
int tshift = (mu == Nd-1) ? 1 : 0;
unsigned int LLt = GridDefaultLatt()[Tp];
conformable(_grid, q_in.Grid());
conformable(_grid, q_out.Grid());
assert(0);
auto UGrid= this->GaugeGrid();
PropagatorField tmp(UGrid);
PropagatorField Utmp(UGrid);
PropagatorField L(UGrid);
PropagatorField zz (UGrid);
zz=Zero();
LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1);
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT,
};
Gamma gmu=Gamma(Gmu[mu]);
tmp = Cshift(q_in,mu,1);
Impl::multLinkField(Utmp,this->Umu,tmp,mu);
tmp = ( Utmp*lattice_cmplx - gmu*Utmp*lattice_cmplx ); // Forward hop
tmp = where((lcoor>=tmin),tmp,zz); // Mask the time
q_out = where((lcoor<=tmax),tmp,zz); // Position of current complicated
tmp = q_in *lattice_cmplx;
tmp = Cshift(tmp,mu,-1);
Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd); // Adjoint link
tmp = -( Utmp + gmu*Utmp );
// Mask the time
if (tmax == LLt - 1 && tshift == 1){ // quick fix to include timeslice 0 if tmax + tshift is over the last timeslice
unsigned int t0 = 0;
tmp = where(((lcoor==t0) || (lcoor>=tmin+tshift)),tmp,zz);
} else {
tmp = where((lcoor>=tmin+tshift),tmp,zz);
}
q_out+= where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated
}
NAMESPACE_END(Grid);

View File

@ -498,6 +498,7 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
#ifndef GRID_CUDA
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDag); return;}
#endif
acceleratorFenceComputeStream();
} else if( interior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagInt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagInt); return;}
@ -505,11 +506,13 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;}
#endif
} else if( exterior ) {
acceleratorFenceComputeStream();
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;}
#ifndef GRID_CUDA
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;}
#endif
acceleratorFenceComputeStream();
}
assert(0 && " Kernel optimisation case not covered ");
}

View File

@ -9,6 +9,7 @@
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Daniel Richtmann <daniel.richtmann@gmail.com>
Author: Mattia Bruno <mattia.bruno@cern.ch>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -32,10 +33,12 @@
#include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/CompactWilsonCloverFermion.h>
#include <Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h>
#include <Grid/qcd/action/fermion/CloverHelpers.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class CompactWilsonCloverFermion<IMPLEMENTATION>;
template class CompactWilsonCloverFermion<IMPLEMENTATION, CompactCloverHelpers<IMPLEMENTATION>>;
template class CompactWilsonCloverFermion<IMPLEMENTATION, CompactExpCloverHelpers<IMPLEMENTATION>>;
NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -8,7 +8,8 @@
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Mattia Bruno <mattia.bruno@cern.ch>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
@ -31,10 +32,12 @@
#include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/WilsonCloverFermion.h>
#include <Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h>
#include <Grid/qcd/action/fermion/CloverHelpers.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonCloverFermion<IMPLEMENTATION>;
template class WilsonCloverFermion<IMPLEMENTATION, CloverHelpers<IMPLEMENTATION>>;
template class WilsonCloverFermion<IMPLEMENTATION, ExpCloverHelpers<IMPLEMENTATION>>;
NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -18,6 +18,10 @@ WILSON_IMPL_LIST=" \
GparityWilsonImplF \
GparityWilsonImplD "
COMPACT_WILSON_IMPL_LIST=" \
WilsonImplF \
WilsonImplD "
DWF_IMPL_LIST=" \
WilsonImplF \
WilsonImplD \
@ -40,13 +44,23 @@ EOF
done
CC_LIST="WilsonCloverFermionInstantiation CompactWilsonCloverFermionInstantiation WilsonFermionInstantiation WilsonKernelsInstantiation WilsonTMFermionInstantiation"
CC_LIST="WilsonCloverFermionInstantiation WilsonFermionInstantiation WilsonKernelsInstantiation WilsonTMFermionInstantiation"
for impl in $WILSON_IMPL_LIST
do
for f in $CC_LIST
do
ln -f -s ../$f.cc.master $impl/$f$impl.cc
ln -f -s ../$f.cc.master $impl/$f$impl.cc
done
done
CC_LIST="CompactWilsonCloverFermionInstantiation"
for impl in $COMPACT_WILSON_IMPL_LIST
do
for f in $CC_LIST
do
ln -f -s ../$f.cc.master $impl/$f$impl.cc
done
done
@ -63,14 +77,14 @@ for impl in $DWF_IMPL_LIST $GDWF_IMPL_LIST
do
for f in $CC_LIST
do
ln -f -s ../$f.cc.master $impl/$f$impl.cc
ln -f -s ../$f.cc.master $impl/$f$impl.cc
done
done
# overwrite the .cc file in Gparity directories
for impl in $GDWF_IMPL_LIST
do
ln -f -s ../WilsonKernelsInstantiationGparity.cc.master $impl/WilsonKernelsInstantiation$impl.cc
ln -f -s ../WilsonKernelsInstantiationGparity.cc.master $impl/WilsonKernelsInstantiation$impl.cc
done
@ -84,7 +98,7 @@ for impl in $STAG_IMPL_LIST
do
for f in $CC_LIST
do
ln -f -s ../$f.cc.master $impl/$f$impl.cc
ln -f -s ../$f.cc.master $impl/$f$impl.cc
done
done

View File

@ -69,11 +69,6 @@ public:
return PeriodicBC::ShiftStaple(Link,mu);
}
//Same as Cshift for periodic BCs
static inline GaugeLinkField CshiftLink(const GaugeLinkField &Link, int mu, int shift){
return PeriodicBC::CshiftLink(Link,mu,shift);
}
static inline bool isPeriodicGaugeField(void) { return true; }
};
@ -115,11 +110,6 @@ public:
return PeriodicBC::CovShiftBackward(Link, mu, field);
}
//If mu is a conjugate BC direction
//Out(x) = U^dag_\mu(x-mu) | x_\mu != 0
// = U^T_\mu(L-1) | x_\mu == 0
//else
//Out(x) = U^dag_\mu(x-mu mod L)
static inline GaugeLinkField
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu)
{
@ -139,13 +129,6 @@ public:
return PeriodicBC::CovShiftIdentityForward(Link,mu);
}
//If mu is a conjugate BC direction
//Out(x) = S_\mu(x+mu) | x_\mu != L-1
// = S*_\mu(x+mu) | x_\mu == L-1
//else
//Out(x) = S_\mu(x+mu mod L)
//Note: While this is used for Staples it is also applicable for shifting gauge links or gauge transformation matrices
static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu)
{
assert(_conjDirs.size() == Nd);
@ -155,27 +138,6 @@ public:
return PeriodicBC::ShiftStaple(Link,mu);
}
//Boundary-aware C-shift of gauge links / gauge transformation matrices
//For conjugate BC direction
//shift = 1
//Out(x) = U_\mu(x+\hat\mu) | x_\mu != L-1
// = U*_\mu(0) | x_\mu == L-1
//shift = -1
//Out(x) = U_\mu(x-mu) | x_\mu != 0
// = U*_\mu(L-1) | x_\mu == 0
//else
//shift = 1
//Out(x) = U_\mu(x+\hat\mu mod L)
//shift = -1
//Out(x) = U_\mu(x-\hat\mu mod L)
static inline GaugeLinkField CshiftLink(const GaugeLinkField &Link, int mu, int shift){
assert(_conjDirs.size() == Nd);
if(_conjDirs[mu])
return ConjugateBC::CshiftLink(Link,mu,shift);
else
return PeriodicBC::CshiftLink(Link,mu,shift);
}
static inline void setDirections(std::vector<int> &conjDirs) { _conjDirs=conjDirs; }
static inline std::vector<int> getDirections(void) { return _conjDirs; }
static inline bool isPeriodicGaugeField(void) { return false; }

View File

@ -49,7 +49,7 @@ NAMESPACE_BEGIN(Grid);
typedef Lattice<SiteLink> LinkField;
typedef Lattice<SiteField> Field;
typedef Field ComplexField;
typedef LinkField ComplexField;
};
typedef QedGImpl<vComplex> QedGImplR;

View File

@ -40,66 +40,13 @@ NAMESPACE_BEGIN(Grid);
X=X-Y;
RealD Nd = norm2(X);
std::cout << "************************* "<<std::endl;
std::cout << " | noise |^2 = "<<Nx<<std::endl;
std::cout << " | (MdagM^-1/2)^2 noise |^2 = "<<Nz<<std::endl;
std::cout << " | MdagM (MdagM^-1/2)^2 noise |^2 = "<<Ny<<std::endl;
std::cout << " | noise - MdagM (MdagM^-1/2)^2 noise |^2 = "<<Nd<<std::endl;
std::cout << " | noise - MdagM (MdagM^-1/2)^2 noise|/|noise| = " << std::sqrt(Nd/Nx) << std::endl;
std::cout << " noise = "<<Nx<<std::endl;
std::cout << " (MdagM^-1/2)^2 noise = "<<Nz<<std::endl;
std::cout << " MdagM (MdagM^-1/2)^2 noise = "<<Ny<<std::endl;
std::cout << " noise - MdagM (MdagM^-1/2)^2 noise = "<<Nd<<std::endl;
std::cout << "************************* "<<std::endl;
assert( (std::sqrt(Nd/Nx)<tol) && " InverseSqrtBoundsCheck ");
}
/* For a HermOp = M^dag M, check the approximation of HermOp^{-1/inv_pow}
by computing |X - HermOp * [ Hermop^{-1/inv_pow} ]^{inv_pow} X| < tol
for noise X (aka GaussNoise).
ApproxNegPow should be the rational approximation for X^{-1/inv_pow}
*/
template<class Field> void InversePowerBoundsCheck(int inv_pow,
int MaxIter,double tol,
LinearOperatorBase<Field> &HermOp,
Field &GaussNoise,
MultiShiftFunction &ApproxNegPow)
{
GridBase *FermionGrid = GaussNoise.Grid();
Field X(FermionGrid);
Field Y(FermionGrid);
Field Z(FermionGrid);
Field tmp1(FermionGrid), tmp2(FermionGrid);
X=GaussNoise;
RealD Nx = norm2(X);
ConjugateGradientMultiShift<Field> msCG(MaxIter,ApproxNegPow);
tmp1 = X;
Field* in = &tmp1;
Field* out = &tmp2;
for(int i=0;i<inv_pow;i++){ //apply [ Hermop^{-1/inv_pow} ]^{inv_pow} X = HermOp^{-1} X
msCG(HermOp, *in, *out); //backwards conventions!
if(i!=inv_pow-1) std::swap(in, out);
}
Z = *out;
RealD Nz = norm2(Z);
HermOp.HermOp(Z,Y);
RealD Ny = norm2(Y);
X=X-Y;
RealD Nd = norm2(X);
std::cout << "************************* "<<std::endl;
std::cout << " | noise |^2 = "<<Nx<<std::endl;
std::cout << " | (MdagM^-1/" << inv_pow << ")^" << inv_pow << " noise |^2 = "<<Nz<<std::endl;
std::cout << " | MdagM (MdagM^-1/" << inv_pow << ")^" << inv_pow << " noise |^2 = "<<Ny<<std::endl;
std::cout << " | noise - MdagM (MdagM^-1/" << inv_pow << ")^" << inv_pow << " noise |^2 = "<<Nd<<std::endl;
std::cout << " | noise - MdagM (MdagM^-1/" << inv_pow << ")^" << inv_pow << " noise |/| noise | = "<<std::sqrt(Nd/Nx)<<std::endl;
std::cout << "************************* "<<std::endl;
assert( (std::sqrt(Nd/Nx)<tol) && " InversePowerBoundsCheck ");
}
NAMESPACE_END(Grid);

View File

@ -44,10 +44,6 @@ NAMESPACE_BEGIN(Grid);
// Exact one flavour implementation of DWF determinant ratio //
///////////////////////////////////////////////////////////////
//Note: using mixed prec CG for the heatbath solver in this action class will not work
// because the L, R operators must have their shift coefficients updated throughout the heatbath step
// You will find that the heatbath solver simply won't converge.
// To use mixed precision here use the ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction variant below
template<class Impl>
class ExactOneFlavourRatioPseudoFermionAction : public Action<typename Impl::GaugeField>
{
@ -61,60 +57,37 @@ NAMESPACE_BEGIN(Grid);
bool use_heatbath_forecasting;
AbstractEOFAFermion<Impl>& Lop; // the basic LH operator
AbstractEOFAFermion<Impl>& Rop; // the basic RH operator
SchurRedBlackDiagMooeeSolve<FermionField> SolverHBL;
SchurRedBlackDiagMooeeSolve<FermionField> SolverHBR;
SchurRedBlackDiagMooeeSolve<FermionField> SolverHB;
SchurRedBlackDiagMooeeSolve<FermionField> SolverL;
SchurRedBlackDiagMooeeSolve<FermionField> SolverR;
SchurRedBlackDiagMooeeSolve<FermionField> DerivativeSolverL;
SchurRedBlackDiagMooeeSolve<FermionField> DerivativeSolverR;
FermionField Phi; // the pseudofermion field for this trajectory
RealD norm2_eta; //|eta|^2 where eta is the random gaussian field used to generate the pseudofermion field
bool initial_action; //true for the first call to S after refresh, for which the identity S = |eta|^2 holds provided the rational approx is good
public:
//Used in the heatbath, refresh the shift coefficients of the L (LorR=0) or R (LorR=1) operator
virtual void heatbathRefreshShiftCoefficients(int LorR, RealD to){
AbstractEOFAFermion<Impl>&op = LorR == 0 ? Lop : Rop;
op.RefreshShiftCoefficients(to);
}
//Use the same solver for L,R in all cases
ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop,
AbstractEOFAFermion<Impl>& _Rop,
OperatorFunction<FermionField>& CG,
Params& p,
bool use_fc=false)
: ExactOneFlavourRatioPseudoFermionAction(_Lop,_Rop,CG,CG,CG,CG,CG,CG,p,use_fc) {};
//Use the same solver for L,R in the heatbath but different solvers elsewhere
: ExactOneFlavourRatioPseudoFermionAction(_Lop,_Rop,CG,CG,CG,CG,CG,p,use_fc) {};
ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop,
AbstractEOFAFermion<Impl>& _Rop,
OperatorFunction<FermionField>& HeatbathCG,
OperatorFunction<FermionField>& ActionCGL, OperatorFunction<FermionField>& ActionCGR,
OperatorFunction<FermionField>& DerivCGL , OperatorFunction<FermionField>& DerivCGR,
Params& p,
bool use_fc=false)
: ExactOneFlavourRatioPseudoFermionAction(_Lop,_Rop,HeatbathCG,HeatbathCG, ActionCGL, ActionCGR, DerivCGL,DerivCGR,p,use_fc) {};
//Use different solvers for L,R in all cases
ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop,
AbstractEOFAFermion<Impl>& _Rop,
OperatorFunction<FermionField>& HeatbathCGL, OperatorFunction<FermionField>& HeatbathCGR,
OperatorFunction<FermionField>& HeatbathCG,
OperatorFunction<FermionField>& ActionCGL, OperatorFunction<FermionField>& ActionCGR,
OperatorFunction<FermionField>& DerivCGL , OperatorFunction<FermionField>& DerivCGR,
Params& p,
bool use_fc=false) :
Lop(_Lop),
Rop(_Rop),
SolverHBL(HeatbathCGL,false,true), SolverHBR(HeatbathCGR,false,true),
SolverHB(HeatbathCG,false,true),
SolverL(ActionCGL, false, true), SolverR(ActionCGR, false, true),
DerivativeSolverL(DerivCGL, false, true), DerivativeSolverR(DerivCGR, false, true),
Phi(_Lop.FermionGrid()),
param(p),
use_heatbath_forecasting(use_fc),
initial_action(false)
use_heatbath_forecasting(use_fc)
{
AlgRemez remez(param.lo, param.hi, param.precision);
@ -124,8 +97,6 @@ NAMESPACE_BEGIN(Grid);
PowerNegHalf.Init(remez, param.tolerance, true);
};
const FermionField &getPhi() const{ return Phi; }
virtual std::string action_name() { return "ExactOneFlavourRatioPseudoFermionAction"; }
virtual std::string LogParameters() {
@ -146,19 +117,6 @@ NAMESPACE_BEGIN(Grid);
else{ for(int s=0; s<Ls; ++s){ axpby_ssp_pminus(out, 0.0, in, 1.0, in, s, s); } }
}
virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) {
// P(eta_o) = e^{- eta_o^dag eta_o}
//
// e^{x^2/2 sig^2} => sig^2 = 0.5.
//
RealD scale = std::sqrt(0.5);
FermionField eta (Lop.FermionGrid());
gaussian(pRNG,eta); eta = eta * scale;
refresh(U,eta);
}
// EOFA heatbath: see Eqn. (29) of arXiv:1706.05843
// We generate a Gaussian noise vector \eta, and then compute
// \Phi = M_{\rm EOFA}^{-1/2} * \eta
@ -166,10 +124,12 @@ NAMESPACE_BEGIN(Grid);
//
// As a check of rational require \Phi^dag M_{EOFA} \Phi == eta^dag M^-1/2^dag M M^-1/2 eta = eta^dag eta
//
void refresh(const GaugeField &U, const FermionField &eta) {
virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG)
{
Lop.ImportGauge(U);
Rop.ImportGauge(U);
FermionField eta (Lop.FermionGrid());
FermionField CG_src (Lop.FermionGrid());
FermionField CG_soln (Lop.FermionGrid());
FermionField Forecast_src(Lop.FermionGrid());
@ -180,6 +140,11 @@ NAMESPACE_BEGIN(Grid);
if(use_heatbath_forecasting){ prev_solns.reserve(param.degree); }
ChronoForecast<AbstractEOFAFermion<Impl>, FermionField> Forecast;
// Seed with Gaussian noise vector (var = 0.5)
RealD scale = std::sqrt(0.5);
gaussian(pRNG,eta);
eta = eta * scale;
// \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta
RealD N(PowerNegHalf.norm);
for(int k=0; k<param.degree; ++k){ N += PowerNegHalf.residues[k] / ( 1.0 + PowerNegHalf.poles[k] ); }
@ -195,16 +160,15 @@ NAMESPACE_BEGIN(Grid);
tmp[1] = Zero();
for(int k=0; k<param.degree; ++k){
gamma_l = 1.0 / ( 1.0 + PowerNegHalf.poles[k] );
heatbathRefreshShiftCoefficients(0, -gamma_l);
//Lop.RefreshShiftCoefficients(-gamma_l);
Lop.RefreshShiftCoefficients(-gamma_l);
if(use_heatbath_forecasting){ // Forecast CG guess using solutions from previous poles
Lop.Mdag(CG_src, Forecast_src);
CG_soln = Forecast(Lop, Forecast_src, prev_solns);
SolverHBL(Lop, CG_src, CG_soln);
SolverHB(Lop, CG_src, CG_soln);
prev_solns.push_back(CG_soln);
} else {
CG_soln = Zero(); // Just use zero as the initial guess
SolverHBL(Lop, CG_src, CG_soln);
SolverHB(Lop, CG_src, CG_soln);
}
Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
tmp[1] = tmp[1] + ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Lop.k ) * tmp[0];
@ -223,16 +187,15 @@ NAMESPACE_BEGIN(Grid);
if(use_heatbath_forecasting){ prev_solns.clear(); } // empirically, LH solns don't help for RH solves
for(int k=0; k<param.degree; ++k){
gamma_l = 1.0 / ( 1.0 + PowerNegHalf.poles[k] );
heatbathRefreshShiftCoefficients(1, -gamma_l*PowerNegHalf.poles[k]);
//Rop.RefreshShiftCoefficients(-gamma_l*PowerNegHalf.poles[k]);
Rop.RefreshShiftCoefficients(-gamma_l*PowerNegHalf.poles[k]);
if(use_heatbath_forecasting){
Rop.Mdag(CG_src, Forecast_src);
CG_soln = Forecast(Rop, Forecast_src, prev_solns);
SolverHBR(Rop, CG_src, CG_soln);
SolverHB(Rop, CG_src, CG_soln);
prev_solns.push_back(CG_soln);
} else {
CG_soln = Zero();
SolverHBR(Rop, CG_src, CG_soln);
SolverHB(Rop, CG_src, CG_soln);
}
Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
tmp[1] = tmp[1] - ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Rop.k ) * tmp[0];
@ -242,119 +205,49 @@ NAMESPACE_BEGIN(Grid);
Phi = Phi + tmp[1];
// Reset shift coefficients for energy and force evals
//Lop.RefreshShiftCoefficients(0.0);
//Rop.RefreshShiftCoefficients(-1.0);
heatbathRefreshShiftCoefficients(0, 0.0);
heatbathRefreshShiftCoefficients(1, -1.0);
//Mark that the next call to S is the first after refresh
initial_action = true;
Lop.RefreshShiftCoefficients(0.0);
Rop.RefreshShiftCoefficients(-1.0);
// Bounds check
RealD EtaDagEta = norm2(eta);
norm2_eta = EtaDagEta;
// RealD PhiDagMPhi= norm2(eta);
};
void Meofa(const GaugeField& U,const FermionField &in, FermionField & out)
void Meofa(const GaugeField& U,const FermionField &phi, FermionField & Mphi)
{
#if 0
Lop.ImportGauge(U);
Rop.ImportGauge(U);
FermionField spProj_in(Lop.FermionGrid());
FermionField spProj_Phi(Lop.FermionGrid());
FermionField mPhi(Lop.FermionGrid());
std::vector<FermionField> tmp(2, Lop.FermionGrid());
out = in;
mPhi = phi;
// LH term: S = S - k <\Phi| P_{-} \Omega_{-}^{\dagger} H(mf)^{-1} \Omega_{-} P_{-} |\Phi>
spProj(in, spProj_in, -1, Lop.Ls);
Lop.Omega(spProj_in, tmp[0], -1, 0);
spProj(Phi, spProj_Phi, -1, Lop.Ls);
Lop.Omega(spProj_Phi, tmp[0], -1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = Zero();
SolverL(Lop, tmp[1], tmp[0]);
Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back
Lop.Omega(tmp[1], tmp[0], -1, 1);
spProj(tmp[0], tmp[1], -1, Lop.Ls);
out = out - Lop.k * tmp[1];
mPhi = mPhi - Lop.k * innerProduct(spProj_Phi, tmp[0]).real();
// RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb)
// - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} |\Phi>
spProj(in, spProj_in, 1, Rop.Ls);
Rop.Omega(spProj_in, tmp[0], 1, 0);
// - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi>
spProj(Phi, spProj_Phi, 1, Rop.Ls);
Rop.Omega(spProj_Phi, tmp[0], 1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = Zero();
SolverR(Rop, tmp[1], tmp[0]);
Rop.Dtilde(tmp[0], tmp[1]);
Rop.Omega(tmp[1], tmp[0], 1, 1);
spProj(tmp[0], tmp[1], 1, Rop.Ls);
out = out + Rop.k * tmp[1];
action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real();
#endif
}
//Due to the structure of EOFA, it is no more expensive to compute the inverse of Meofa
//To ensure correctness we can simply reuse the heatbath code but use the rational approx
//f(x) = 1/x which corresponds to alpha_0=0, alpha_1=1, beta_1=0 => gamma_1=1
void MeofaInv(const GaugeField &U, const FermionField &in, FermionField &out) {
Lop.ImportGauge(U);
Rop.ImportGauge(U);
FermionField CG_src (Lop.FermionGrid());
FermionField CG_soln (Lop.FermionGrid());
std::vector<FermionField> tmp(2, Lop.FermionGrid());
// \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta
// = 1 * \eta
out = in;
// LH terms:
// \Phi = \Phi + k \sum_{k=1}^{N_{p}} P_{-} \Omega_{-}^{\dagger} ( H(mf)
// - \gamma_{l} \Delta_{-}(mf,mb) P_{-} )^{-1} \Omega_{-} P_{-} \eta
spProj(in, tmp[0], -1, Lop.Ls);
Lop.Omega(tmp[0], tmp[1], -1, 0);
G5R5(CG_src, tmp[1]);
{
heatbathRefreshShiftCoefficients(0, -1.); //-gamma_1 = -1.
CG_soln = Zero(); // Just use zero as the initial guess
SolverHBL(Lop, CG_src, CG_soln);
Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
tmp[1] = Lop.k * tmp[0];
}
Lop.Omega(tmp[1], tmp[0], -1, 1);
spProj(tmp[0], tmp[1], -1, Lop.Ls);
out = out + tmp[1];
// RH terms:
// \Phi = \Phi - k \sum_{k=1}^{N_{p}} P_{+} \Omega_{+}^{\dagger} ( H(mb)
// - \beta_l\gamma_{l} \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} \eta
spProj(in, tmp[0], 1, Rop.Ls);
Rop.Omega(tmp[0], tmp[1], 1, 0);
G5R5(CG_src, tmp[1]);
{
heatbathRefreshShiftCoefficients(1, 0.); //-gamma_1 * beta_1 = 0
CG_soln = Zero();
SolverHBR(Rop, CG_src, CG_soln);
Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
tmp[1] = - Rop.k * tmp[0];
}
Rop.Omega(tmp[1], tmp[0], 1, 1);
spProj(tmp[0], tmp[1], 1, Rop.Ls);
out = out + tmp[1];
// Reset shift coefficients for energy and force evals
heatbathRefreshShiftCoefficients(0, 0.0);
heatbathRefreshShiftCoefficients(1, -1.0);
};
// EOFA action: see Eqn. (10) of arXiv:1706.05843
virtual RealD S(const GaugeField& U)
{
@ -378,7 +271,7 @@ NAMESPACE_BEGIN(Grid);
action -= Lop.k * innerProduct(spProj_Phi, tmp[0]).real();
// RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb)
// - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} |\Phi>
// - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi>
spProj(Phi, spProj_Phi, 1, Rop.Ls);
Rop.Omega(spProj_Phi, tmp[0], 1, 0);
G5R5(tmp[1], tmp[0]);
@ -388,26 +281,6 @@ NAMESPACE_BEGIN(Grid);
Rop.Omega(tmp[1], tmp[0], 1, 1);
action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real();
if(initial_action){
//For the first call to S after refresh, S = |eta|^2. We can use this to ensure the rational approx is good
RealD diff = action - norm2_eta;
//S_init = eta^dag M^{-1/2} M M^{-1/2} eta
//S_init - eta^dag eta = eta^dag ( M^{-1/2} M M^{-1/2} - 1 ) eta
//If approximate solution
//S_init - eta^dag eta = eta^dag ( [M^{-1/2}+\delta M^{-1/2}] M [M^{-1/2}+\delta M^{-1/2}] - 1 ) eta
// \approx eta^dag ( \delta M^{-1/2} M^{1/2} + M^{1/2}\delta M^{-1/2} ) eta
// We divide out |eta|^2 to remove source scaling but the tolerance on this check should still be somewhat higher than the actual approx tolerance
RealD test = fabs(diff)/norm2_eta; //test the quality of the rational approx
std::cout << GridLogMessage << action_name() << " initial action " << action << " expect " << norm2_eta << "; diff " << diff << std::endl;
std::cout << GridLogMessage << action_name() << "[ eta^dag ( M^{-1/2} M M^{-1/2} - 1 ) eta ]/|eta^2| = " << test << " expect 0 (tol " << param.BoundsCheckTol << ")" << std::endl;
assert( ( test < param.BoundsCheckTol ) && " Initial action check failed" );
initial_action = false;
}
return action;
};
@ -456,40 +329,6 @@ NAMESPACE_BEGIN(Grid);
};
};
template<class ImplD, class ImplF>
class ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction : public ExactOneFlavourRatioPseudoFermionAction<ImplD>{
public:
INHERIT_IMPL_TYPES(ImplD);
typedef OneFlavourRationalParams Params;
private:
AbstractEOFAFermion<ImplF>& LopF; // the basic LH operator
AbstractEOFAFermion<ImplF>& RopF; // the basic RH operator
public:
virtual std::string action_name() { return "ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction"; }
//Used in the heatbath, refresh the shift coefficients of the L (LorR=0) or R (LorR=1) operator
virtual void heatbathRefreshShiftCoefficients(int LorR, RealD to){
AbstractEOFAFermion<ImplF> &op = LorR == 0 ? LopF : RopF;
op.RefreshShiftCoefficients(to);
this->ExactOneFlavourRatioPseudoFermionAction<ImplD>::heatbathRefreshShiftCoefficients(LorR,to);
}
ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction(AbstractEOFAFermion<ImplF>& _LopF,
AbstractEOFAFermion<ImplF>& _RopF,
AbstractEOFAFermion<ImplD>& _LopD,
AbstractEOFAFermion<ImplD>& _RopD,
OperatorFunction<FermionField>& HeatbathCGL, OperatorFunction<FermionField>& HeatbathCGR,
OperatorFunction<FermionField>& ActionCGL, OperatorFunction<FermionField>& ActionCGR,
OperatorFunction<FermionField>& DerivCGL , OperatorFunction<FermionField>& DerivCGR,
Params& p,
bool use_fc=false) :
LopF(_LopF), RopF(_RopF), ExactOneFlavourRatioPseudoFermionAction<ImplD>(_LopD, _RopD, HeatbathCGL, HeatbathCGR, ActionCGL, ActionCGR, DerivCGL, DerivCGR, p, use_fc){}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,372 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h
Copyright (C) 2015
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_H
#define QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_H
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////
// Generic rational approximation for ratios of operators
/////////////////////////////////////////////////////////
/* S_f = -log( det( [M^dag M]/[V^dag V] )^{1/inv_pow} )
= chi^dag ( [M^dag M]/[V^dag V] )^{-1/inv_pow} chi\
= chi^dag ( [V^dag V]^{-1/2} [M^dag M] [V^dag V]^{-1/2} )^{-1/inv_pow} chi\
= chi^dag [V^dag V]^{1/(2*inv_pow)} [M^dag M]^{-1/inv_pow} [V^dag V]^{1/(2*inv_pow)} chi\
S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
BIG WARNING:
Here V^dag V is referred to in this code as the "numerator" operator and M^dag M is the *denominator* operator.
this refers to their position in the pseudofermion action, which is the *inverse* of what appears in the determinant
Thus for DWF the numerator operator is the Pauli-Villars operator
Here P/Q \sim R_{1/(2*inv_pow)} ~ (V^dagV)^{1/(2*inv_pow)}
Here N/D \sim R_{-1/inv_pow} ~ (M^dagM)^{-1/inv_pow}
*/
template<class Impl>
class GeneralEvenOddRatioRationalPseudoFermionAction : public Action<typename Impl::GaugeField> {
public:
INHERIT_IMPL_TYPES(Impl);
typedef RationalActionParams Params;
Params param;
//For action evaluation
MultiShiftFunction ApproxPowerAction ; //rational approx for X^{1/inv_pow}
MultiShiftFunction ApproxNegPowerAction; //rational approx for X^{-1/inv_pow}
MultiShiftFunction ApproxHalfPowerAction; //rational approx for X^{1/(2*inv_pow)}
MultiShiftFunction ApproxNegHalfPowerAction; //rational approx for X^{-1/(2*inv_pow)}
//For the MD integration
MultiShiftFunction ApproxPowerMD ; //rational approx for X^{1/inv_pow}
MultiShiftFunction ApproxNegPowerMD; //rational approx for X^{-1/inv_pow}
MultiShiftFunction ApproxHalfPowerMD; //rational approx for X^{1/(2*inv_pow)}
MultiShiftFunction ApproxNegHalfPowerMD; //rational approx for X^{-1/(2*inv_pow)}
private:
FermionOperator<Impl> & NumOp;// the basic operator
FermionOperator<Impl> & DenOp;// the basic operator
FermionField PhiEven; // the pseudo fermion field for this trajectory
FermionField PhiOdd; // the pseudo fermion field for this trajectory
//Generate the approximation to x^{1/inv_pow} (->approx) and x^{-1/inv_pow} (-> approx_inv) by an approx_degree degree rational approximation
//CG_tolerance is used to issue a warning if the approximation error is larger than the tolerance of the CG and is otherwise just stored in the MultiShiftFunction for use by the multi-shift
static void generateApprox(MultiShiftFunction &approx, MultiShiftFunction &approx_inv, int inv_pow, int approx_degree, double CG_tolerance, AlgRemez &remez){
std::cout<<GridLogMessage << "Generating degree "<< approx_degree<<" approximation for x^(1/" << inv_pow << ")"<<std::endl;
double error = remez.generateApprox(approx_degree,1,inv_pow);
if(error > CG_tolerance)
std::cout<<GridLogMessage << "WARNING: Remez approximation has a larger error " << error << " than the CG tolerance " << CG_tolerance << "! Try increasing the number of poles" << std::endl;
approx.Init(remez, CG_tolerance,false);
approx_inv.Init(remez, CG_tolerance,true);
}
protected:
static constexpr bool Numerator = true;
static constexpr bool Denominator = false;
//Allow derived classes to override the multishift CG
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionField &in, FermionField &out){
SchurDifferentiableOperator<Impl> schurOp(numerator ? NumOp : DenOp);
ConjugateGradientMultiShift<FermionField> msCG(MaxIter, approx);
msCG(schurOp,in, out);
}
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionField &in, std::vector<FermionField> &out_elems, FermionField &out){
SchurDifferentiableOperator<Impl> schurOp(numerator ? NumOp : DenOp);
ConjugateGradientMultiShift<FermionField> msCG(MaxIter, approx);
msCG(schurOp,in, out_elems, out);
}
//Allow derived classes to override the gauge import
virtual void ImportGauge(const GaugeField &U){
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
}
public:
GeneralEvenOddRatioRationalPseudoFermionAction(FermionOperator<Impl> &_NumOp,
FermionOperator<Impl> &_DenOp,
const Params & p
) :
NumOp(_NumOp),
DenOp(_DenOp),
PhiOdd (_NumOp.FermionRedBlackGrid()),
PhiEven(_NumOp.FermionRedBlackGrid()),
param(p)
{
std::cout<<GridLogMessage << action_name() << " initialize: starting" << std::endl;
AlgRemez remez(param.lo,param.hi,param.precision);
//Generate approximations for action eval
generateApprox(ApproxPowerAction, ApproxNegPowerAction, param.inv_pow, param.action_degree, param.action_tolerance, remez);
generateApprox(ApproxHalfPowerAction, ApproxNegHalfPowerAction, 2*param.inv_pow, param.action_degree, param.action_tolerance, remez);
//Generate approximations for MD
if(param.md_degree != param.action_degree){ //note the CG tolerance is unrelated to the stopping condition of the Remez algorithm
generateApprox(ApproxPowerMD, ApproxNegPowerMD, param.inv_pow, param.md_degree, param.md_tolerance, remez);
generateApprox(ApproxHalfPowerMD, ApproxNegHalfPowerMD, 2*param.inv_pow, param.md_degree, param.md_tolerance, remez);
}else{
std::cout<<GridLogMessage << "Using same rational approximations for MD as for action evaluation" << std::endl;
ApproxPowerMD = ApproxPowerAction;
ApproxNegPowerMD = ApproxNegPowerAction;
for(int i=0;i<ApproxPowerMD.tolerances.size();i++)
ApproxNegPowerMD.tolerances[i] = ApproxPowerMD.tolerances[i] = param.md_tolerance; //used for multishift
ApproxHalfPowerMD = ApproxHalfPowerAction;
ApproxNegHalfPowerMD = ApproxNegHalfPowerAction;
for(int i=0;i<ApproxPowerMD.tolerances.size();i++)
ApproxNegHalfPowerMD.tolerances[i] = ApproxHalfPowerMD.tolerances[i] = param.md_tolerance;
}
std::cout<<GridLogMessage << action_name() << " initialize: complete" << std::endl;
};
virtual std::string action_name(){return "GeneralEvenOddRatioRationalPseudoFermionAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name()<<"] Power : 1/" << param.inv_pow << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Low :" << param.lo << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] High :" << param.hi << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Max iterations :" << param.MaxIter << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Tolerance (Action) :" << param.action_tolerance << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Degree (Action) :" << param.action_degree << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Tolerance (MD) :" << param.md_tolerance << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Degree (MD) :" << param.md_degree << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Precision :" << param.precision << std::endl;
return sstream.str();
}
//Access the fermion field
const FermionField &getPhiOdd() const{ return PhiOdd; }
virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) {
std::cout<<GridLogMessage << action_name() << " refresh: starting" << std::endl;
FermionField eta(NumOp.FermionGrid());
// P(eta) \propto e^{- eta^dag eta}
//
// The gaussian function draws from P(x) \propto e^{- x^2 / 2 } [i.e. sigma=1]
// Thus eta = x/sqrt{2} = x * sqrt(1/2)
RealD scale = std::sqrt(0.5);
gaussian(pRNG,eta); eta=eta*scale;
refresh(U,eta);
}
//Allow for manual specification of random field for testing
void refresh(const GaugeField &U, const FermionField &eta) {
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
//
// P(phi) = e^{- phi^dag (VdagV)^1/(2*inv_pow) (MdagM)^-1/inv_pow (VdagV)^1/(2*inv_pow) phi}
// = e^{- phi^dag (VdagV)^1/(2*inv_pow) (MdagM)^-1/(2*inv_pow) (MdagM)^-1/(2*inv_pow) (VdagV)^1/(2*inv_pow) phi}
//
// Phi = (VdagV)^-1/(2*inv_pow) Mdag^{1/(2*inv_pow)} eta
std::cout<<GridLogMessage << action_name() << " refresh: starting" << std::endl;
FermionField etaOdd (NumOp.FermionRedBlackGrid());
FermionField etaEven(NumOp.FermionRedBlackGrid());
FermionField tmp(NumOp.FermionRedBlackGrid());
pickCheckerboard(Even,etaEven,eta);
pickCheckerboard(Odd,etaOdd,eta);
ImportGauge(U);
// MdagM^1/(2*inv_pow) eta
std::cout<<GridLogMessage << action_name() << " refresh: doing (M^dag M)^{1/" << 2*param.inv_pow << "} eta" << std::endl;
multiShiftInverse(Denominator, ApproxHalfPowerAction, param.MaxIter, etaOdd, tmp);
// VdagV^-1/(2*inv_pow) MdagM^1/(2*inv_pow) eta
std::cout<<GridLogMessage << action_name() << " refresh: doing (V^dag V)^{-1/" << 2*param.inv_pow << "} ( (M^dag M)^{1/" << 2*param.inv_pow << "} eta)" << std::endl;
multiShiftInverse(Numerator, ApproxNegHalfPowerAction, param.MaxIter, tmp, PhiOdd);
assert(NumOp.ConstEE() == 1);
assert(DenOp.ConstEE() == 1);
PhiEven = Zero();
std::cout<<GridLogMessage << action_name() << " refresh: starting" << std::endl;
};
//////////////////////////////////////////////////////
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
//////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) {
std::cout<<GridLogMessage << action_name() << " compute action: starting" << std::endl;
ImportGauge(U);
FermionField X(NumOp.FermionRedBlackGrid());
FermionField Y(NumOp.FermionRedBlackGrid());
// VdagV^1/(2*inv_pow) Phi
std::cout<<GridLogMessage << action_name() << " compute action: doing (V^dag V)^{1/" << 2*param.inv_pow << "} Phi" << std::endl;
multiShiftInverse(Numerator, ApproxHalfPowerAction, param.MaxIter, PhiOdd,X);
// MdagM^-1/(2*inv_pow) VdagV^1/(2*inv_pow) Phi
std::cout<<GridLogMessage << action_name() << " compute action: doing (M^dag M)^{-1/" << 2*param.inv_pow << "} ( (V^dag V)^{1/" << 2*param.inv_pow << "} Phi)" << std::endl;
multiShiftInverse(Denominator, ApproxNegHalfPowerAction, param.MaxIter, X,Y);
// Randomly apply rational bounds checks.
int rcheck = rand();
auto grid = NumOp.FermionGrid();
auto r=rand();
grid->Broadcast(0,r);
if ( param.BoundsCheckFreq != 0 && (r % param.BoundsCheckFreq)==0 ) {
std::cout<<GridLogMessage << action_name() << " compute action: doing bounds check" << std::endl;
FermionField gauss(NumOp.FermionRedBlackGrid());
gauss = PhiOdd;
SchurDifferentiableOperator<Impl> MdagM(DenOp);
std::cout<<GridLogMessage << action_name() << " compute action: checking high bounds" << std::endl;
HighBoundCheck(MdagM,gauss,param.hi);
std::cout<<GridLogMessage << action_name() << " compute action: full approximation" << std::endl;
InversePowerBoundsCheck(param.inv_pow,param.MaxIter,param.action_tolerance*100,MdagM,gauss,ApproxNegPowerAction);
std::cout<<GridLogMessage << action_name() << " compute action: bounds check complete" << std::endl;
}
// Phidag VdagV^1/(2*inv_pow) MdagM^-1/(2*inv_pow) MdagM^-1/(2*inv_pow) VdagV^1/(2*inv_pow) Phi
RealD action = norm2(Y);
std::cout<<GridLogMessage << action_name() << " compute action: complete" << std::endl;
return action;
};
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
//
// Here, M is some 5D operator and V is the Pauli-Villars field
// N and D makeup the rat. poly of the M term and P and & makeup the rat.poly of the denom term
//
// Need
// dS_f/dU = chi^dag d[P/Q] N/D P/Q chi
// + chi^dag P/Q d[N/D] P/Q chi
// + chi^dag P/Q N/D d[P/Q] chi
//
// P/Q is expressed as partial fraction expansion:
//
// a0 + \sum_k ak/(V^dagV + bk)
//
// d[P/Q] is then
//
// \sum_k -ak [V^dagV+bk]^{-1} [ dV^dag V + V^dag dV ] [V^dag V + bk]^{-1}
//
// and similar for N/D.
//
// Need
// MpvPhi_k = [Vdag V + bk]^{-1} chi
// MpvPhi = {a0 + \sum_k ak [Vdag V + bk]^{-1} }chi
//
// MfMpvPhi_k = [MdagM+bk]^{-1} MpvPhi
// MfMpvPhi = {a0 + \sum_k ak [Mdag M + bk]^{-1} } MpvPhi
//
// MpvMfMpvPhi_k = [Vdag V + bk]^{-1} MfMpvchi
//
virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
std::cout<<GridLogMessage << action_name() << " deriv: starting" << std::endl;
const int n_f = ApproxNegPowerMD.poles.size();
const int n_pv = ApproxHalfPowerMD.poles.size();
std::vector<FermionField> MpvPhi_k (n_pv,NumOp.FermionRedBlackGrid());
std::vector<FermionField> MpvMfMpvPhi_k(n_pv,NumOp.FermionRedBlackGrid());
std::vector<FermionField> MfMpvPhi_k (n_f ,NumOp.FermionRedBlackGrid());
FermionField MpvPhi(NumOp.FermionRedBlackGrid());
FermionField MfMpvPhi(NumOp.FermionRedBlackGrid());
FermionField MpvMfMpvPhi(NumOp.FermionRedBlackGrid());
FermionField Y(NumOp.FermionRedBlackGrid());
GaugeField tmp(NumOp.GaugeGrid());
ImportGauge(U);
std::cout<<GridLogMessage << action_name() << " deriv: doing (V^dag V)^{1/" << 2*param.inv_pow << "} Phi" << std::endl;
multiShiftInverse(Numerator, ApproxHalfPowerMD, param.MaxIter, PhiOdd,MpvPhi_k,MpvPhi);
std::cout<<GridLogMessage << action_name() << " deriv: doing (M^dag M)^{-1/" << param.inv_pow << "} ( (V^dag V)^{1/" << 2*param.inv_pow << "} Phi)" << std::endl;
multiShiftInverse(Denominator, ApproxNegPowerMD, param.MaxIter, MpvPhi,MfMpvPhi_k,MfMpvPhi);
std::cout<<GridLogMessage << action_name() << " deriv: doing (V^dag V)^{1/" << 2*param.inv_pow << "} ( (M^dag M)^{-1/" << param.inv_pow << "} (V^dag V)^{1/" << 2*param.inv_pow << "} Phi)" << std::endl;
multiShiftInverse(Numerator, ApproxHalfPowerMD, param.MaxIter, MfMpvPhi,MpvMfMpvPhi_k,MpvMfMpvPhi);
SchurDifferentiableOperator<Impl> MdagM(DenOp);
SchurDifferentiableOperator<Impl> VdagV(NumOp);
RealD ak;
dSdU = Zero();
// With these building blocks
//
// dS/dU =
// \sum_k -ak MfMpvPhi_k^dag [ dM^dag M + M^dag dM ] MfMpvPhi_k (1)
// + \sum_k -ak MpvMfMpvPhi_k^\dag [ dV^dag V + V^dag dV ] MpvPhi_k (2)
// -ak MpvPhi_k^dag [ dV^dag V + V^dag dV ] MpvMfMpvPhi_k (3)
//(1)
std::cout<<GridLogMessage << action_name() << " deriv: doing dS/dU part (1)" << std::endl;
for(int k=0;k<n_f;k++){
ak = ApproxNegPowerMD.residues[k];
MdagM.Mpc(MfMpvPhi_k[k],Y);
MdagM.MpcDagDeriv(tmp , MfMpvPhi_k[k], Y ); dSdU=dSdU+ak*tmp;
MdagM.MpcDeriv(tmp , Y, MfMpvPhi_k[k] ); dSdU=dSdU+ak*tmp;
}
//(2)
//(3)
std::cout<<GridLogMessage << action_name() << " deriv: doing dS/dU part (2)+(3)" << std::endl;
for(int k=0;k<n_pv;k++){
ak = ApproxHalfPowerMD.residues[k];
VdagV.Mpc(MpvPhi_k[k],Y);
VdagV.MpcDagDeriv(tmp,MpvMfMpvPhi_k[k],Y); dSdU=dSdU+ak*tmp;
VdagV.MpcDeriv (tmp,Y,MpvMfMpvPhi_k[k]); dSdU=dSdU+ak*tmp;
VdagV.Mpc(MpvMfMpvPhi_k[k],Y); // V as we take Ydag
VdagV.MpcDeriv (tmp,Y, MpvPhi_k[k]); dSdU=dSdU+ak*tmp;
VdagV.MpcDagDeriv(tmp,MpvPhi_k[k], Y); dSdU=dSdU+ak*tmp;
}
//dSdU = Ta(dSdU);
std::cout<<GridLogMessage << action_name() << " deriv: complete" << std::endl;
};
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,93 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/GeneralEvenOddRationalRatioMixedPrec.h
Copyright (C) 2015
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_MIXED_PREC_H
#define QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_MIXED_PREC_H
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Generic rational approximation for ratios of operators utilizing the mixed precision multishift algorithm
// cf. GeneralEvenOddRational.h for details
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class ImplD, class ImplF>
class GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction : public GeneralEvenOddRatioRationalPseudoFermionAction<ImplD> {
private:
typedef typename ImplD::FermionField FermionFieldD;
typedef typename ImplF::FermionField FermionFieldF;
FermionOperator<ImplD> & NumOpD;
FermionOperator<ImplD> & DenOpD;
FermionOperator<ImplF> & NumOpF;
FermionOperator<ImplF> & DenOpF;
Integer ReliableUpdateFreq;
protected:
//Allow derived classes to override the multishift CG
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){
SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD);
SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
ConjugateGradientMultiShiftMixedPrec<FermionFieldD, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
msCG(schurOpD, in, out);
}
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){
SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD);
SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
ConjugateGradientMultiShiftMixedPrec<FermionFieldD, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
msCG(schurOpD, in, out_elems, out);
}
//Allow derived classes to override the gauge import
virtual void ImportGauge(const typename ImplD::GaugeField &Ud){
typename ImplF::GaugeField Uf(NumOpF.GaugeGrid());
precisionChange(Uf, Ud);
NumOpD.ImportGauge(Ud);
DenOpD.ImportGauge(Ud);
NumOpF.ImportGauge(Uf);
DenOpF.ImportGauge(Uf);
}
public:
GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction(FermionOperator<ImplD> &_NumOpD, FermionOperator<ImplD> &_DenOpD,
FermionOperator<ImplF> &_NumOpF, FermionOperator<ImplF> &_DenOpF,
const RationalActionParams & p, Integer _ReliableUpdateFreq
) : GeneralEvenOddRatioRationalPseudoFermionAction<ImplD>(_NumOpD, _DenOpD, p),
ReliableUpdateFreq(_ReliableUpdateFreq), NumOpD(_NumOpD), DenOpD(_DenOpD), NumOpF(_NumOpF), DenOpF(_DenOpF){}
virtual std::string action_name(){return "GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction";}
};
NAMESPACE_END(Grid);
#endif

View File

@ -40,31 +40,249 @@ NAMESPACE_BEGIN(Grid);
// Here N/D \sim R_{-1/2} ~ (M^dagM)^{-1/2}
template<class Impl>
class OneFlavourEvenOddRatioRationalPseudoFermionAction : public GeneralEvenOddRatioRationalPseudoFermionAction<Impl> {
class OneFlavourEvenOddRatioRationalPseudoFermionAction : public Action<typename Impl::GaugeField> {
public:
INHERIT_IMPL_TYPES(Impl);
typedef OneFlavourRationalParams Params;
Params param;
MultiShiftFunction PowerHalf ;
MultiShiftFunction PowerNegHalf;
MultiShiftFunction PowerQuarter;
MultiShiftFunction PowerNegQuarter;
private:
static RationalActionParams transcribe(const Params &in){
RationalActionParams out;
out.inv_pow = 2;
out.lo = in.lo;
out.hi = in.hi;
out.MaxIter = in.MaxIter;
out.action_tolerance = out.md_tolerance = in.tolerance;
out.action_degree = out.md_degree = in.degree;
out.precision = in.precision;
out.BoundsCheckFreq = in.BoundsCheckFreq;
return out;
}
FermionOperator<Impl> & NumOp;// the basic operator
FermionOperator<Impl> & DenOp;// the basic operator
FermionField PhiEven; // the pseudo fermion field for this trajectory
FermionField PhiOdd; // the pseudo fermion field for this trajectory
public:
OneFlavourEvenOddRatioRationalPseudoFermionAction(FermionOperator<Impl> &_NumOp,
FermionOperator<Impl> &_DenOp,
const Params & p
) :
GeneralEvenOddRatioRationalPseudoFermionAction<Impl>(_NumOp, _DenOp, transcribe(p)){}
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
OneFlavourEvenOddRatioRationalPseudoFermionAction(FermionOperator<Impl> &_NumOp,
FermionOperator<Impl> &_DenOp,
Params & p
) :
NumOp(_NumOp),
DenOp(_DenOp),
PhiOdd (_NumOp.FermionRedBlackGrid()),
PhiEven(_NumOp.FermionRedBlackGrid()),
param(p)
{
AlgRemez remez(param.lo,param.hi,param.precision);
// MdagM^(+- 1/2)
std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/2)"<<std::endl;
remez.generateApprox(param.degree,1,2);
PowerHalf.Init(remez,param.tolerance,false);
PowerNegHalf.Init(remez,param.tolerance,true);
// MdagM^(+- 1/4)
std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/4)"<<std::endl;
remez.generateApprox(param.degree,1,4);
PowerQuarter.Init(remez,param.tolerance,false);
PowerNegQuarter.Init(remez,param.tolerance,true);
};
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name()<<"] Low :" << param.lo << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] High :" << param.hi << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Max iterations :" << param.MaxIter << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Tolerance :" << param.tolerance << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Degree :" << param.degree << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Precision :" << param.precision << std::endl;
return sstream.str();
}
virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) {
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
//
// P(phi) = e^{- phi^dag (VdagV)^1/4 (MdagM)^-1/2 (VdagV)^1/4 phi}
// = e^{- phi^dag (VdagV)^1/4 (MdagM)^-1/4 (MdagM)^-1/4 (VdagV)^1/4 phi}
//
// Phi = (VdagV)^-1/4 Mdag^{1/4} eta
//
// P(eta) = e^{- eta^dag eta}
//
// e^{x^2/2 sig^2} => sig^2 = 0.5.
//
// So eta should be of width sig = 1/sqrt(2).
RealD scale = std::sqrt(0.5);
FermionField eta(NumOp.FermionGrid());
FermionField etaOdd (NumOp.FermionRedBlackGrid());
FermionField etaEven(NumOp.FermionRedBlackGrid());
FermionField tmp(NumOp.FermionRedBlackGrid());
gaussian(pRNG,eta); eta=eta*scale;
pickCheckerboard(Even,etaEven,eta);
pickCheckerboard(Odd,etaOdd,eta);
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
// MdagM^1/4 eta
SchurDifferentiableOperator<Impl> MdagM(DenOp);
ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,PowerQuarter);
msCG_M(MdagM,etaOdd,tmp);
// VdagV^-1/4 MdagM^1/4 eta
SchurDifferentiableOperator<Impl> VdagV(NumOp);
ConjugateGradientMultiShift<FermionField> msCG_V(param.MaxIter,PowerNegQuarter);
msCG_V(VdagV,tmp,PhiOdd);
assert(NumOp.ConstEE() == 1);
assert(DenOp.ConstEE() == 1);
PhiEven = Zero();
};
//////////////////////////////////////////////////////
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
//////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) {
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
FermionField X(NumOp.FermionRedBlackGrid());
FermionField Y(NumOp.FermionRedBlackGrid());
// VdagV^1/4 Phi
SchurDifferentiableOperator<Impl> VdagV(NumOp);
ConjugateGradientMultiShift<FermionField> msCG_V(param.MaxIter,PowerQuarter);
msCG_V(VdagV,PhiOdd,X);
// MdagM^-1/4 VdagV^1/4 Phi
SchurDifferentiableOperator<Impl> MdagM(DenOp);
ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,PowerNegQuarter);
msCG_M(MdagM,X,Y);
// Randomly apply rational bounds checks.
auto grid = NumOp.FermionGrid();
auto r=rand();
grid->Broadcast(0,r);
if ( (r%param.BoundsCheckFreq)==0 ) {
FermionField gauss(NumOp.FermionRedBlackGrid());
gauss = PhiOdd;
HighBoundCheck(MdagM,gauss,param.hi);
InverseSqrtBoundsCheck(param.MaxIter,param.tolerance*100,MdagM,gauss,PowerNegHalf);
}
// Phidag VdagV^1/4 MdagM^-1/4 MdagM^-1/4 VdagV^1/4 Phi
RealD action = norm2(Y);
return action;
};
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
//
// Here, M is some 5D operator and V is the Pauli-Villars field
// N and D makeup the rat. poly of the M term and P and & makeup the rat.poly of the denom term
//
// Need
// dS_f/dU = chi^dag d[P/Q] N/D P/Q chi
// + chi^dag P/Q d[N/D] P/Q chi
// + chi^dag P/Q N/D d[P/Q] chi
//
// P/Q is expressed as partial fraction expansion:
//
// a0 + \sum_k ak/(V^dagV + bk)
//
// d[P/Q] is then
//
// \sum_k -ak [V^dagV+bk]^{-1} [ dV^dag V + V^dag dV ] [V^dag V + bk]^{-1}
//
// and similar for N/D.
//
// Need
// MpvPhi_k = [Vdag V + bk]^{-1} chi
// MpvPhi = {a0 + \sum_k ak [Vdag V + bk]^{-1} }chi
//
// MfMpvPhi_k = [MdagM+bk]^{-1} MpvPhi
// MfMpvPhi = {a0 + \sum_k ak [Mdag M + bk]^{-1} } MpvPhi
//
// MpvMfMpvPhi_k = [Vdag V + bk]^{-1} MfMpvchi
//
virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
const int n_f = PowerNegHalf.poles.size();
const int n_pv = PowerQuarter.poles.size();
std::vector<FermionField> MpvPhi_k (n_pv,NumOp.FermionRedBlackGrid());
std::vector<FermionField> MpvMfMpvPhi_k(n_pv,NumOp.FermionRedBlackGrid());
std::vector<FermionField> MfMpvPhi_k (n_f ,NumOp.FermionRedBlackGrid());
FermionField MpvPhi(NumOp.FermionRedBlackGrid());
FermionField MfMpvPhi(NumOp.FermionRedBlackGrid());
FermionField MpvMfMpvPhi(NumOp.FermionRedBlackGrid());
FermionField Y(NumOp.FermionRedBlackGrid());
GaugeField tmp(NumOp.GaugeGrid());
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
SchurDifferentiableOperator<Impl> VdagV(NumOp);
SchurDifferentiableOperator<Impl> MdagM(DenOp);
ConjugateGradientMultiShift<FermionField> msCG_V(param.MaxIter,PowerQuarter);
ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,PowerNegHalf);
msCG_V(VdagV,PhiOdd,MpvPhi_k,MpvPhi);
msCG_M(MdagM,MpvPhi,MfMpvPhi_k,MfMpvPhi);
msCG_V(VdagV,MfMpvPhi,MpvMfMpvPhi_k,MpvMfMpvPhi);
RealD ak;
dSdU = Zero();
// With these building blocks
//
// dS/dU =
// \sum_k -ak MfMpvPhi_k^dag [ dM^dag M + M^dag dM ] MfMpvPhi_k (1)
// + \sum_k -ak MpvMfMpvPhi_k^\dag [ dV^dag V + V^dag dV ] MpvPhi_k (2)
// -ak MpvPhi_k^dag [ dV^dag V + V^dag dV ] MpvMfMpvPhi_k (3)
//(1)
for(int k=0;k<n_f;k++){
ak = PowerNegHalf.residues[k];
MdagM.Mpc(MfMpvPhi_k[k],Y);
MdagM.MpcDagDeriv(tmp , MfMpvPhi_k[k], Y ); dSdU=dSdU+ak*tmp;
MdagM.MpcDeriv(tmp , Y, MfMpvPhi_k[k] ); dSdU=dSdU+ak*tmp;
}
//(2)
//(3)
for(int k=0;k<n_pv;k++){
ak = PowerQuarter.residues[k];
VdagV.Mpc(MpvPhi_k[k],Y);
VdagV.MpcDagDeriv(tmp,MpvMfMpvPhi_k[k],Y); dSdU=dSdU+ak*tmp;
VdagV.MpcDeriv (tmp,Y,MpvMfMpvPhi_k[k]); dSdU=dSdU+ak*tmp;
VdagV.Mpc(MpvMfMpvPhi_k[k],Y); // V as we take Ydag
VdagV.MpcDeriv (tmp,Y, MpvPhi_k[k]); dSdU=dSdU+ak*tmp;
VdagV.MpcDagDeriv(tmp,MpvPhi_k[k], Y); dSdU=dSdU+ak*tmp;
}
//dSdU = Ta(dSdU);
};
};
NAMESPACE_END(Grid);

View File

@ -40,8 +40,6 @@ directory
#include <Grid/qcd/action/pseudofermion/OneFlavourRational.h>
#include <Grid/qcd/action/pseudofermion/OneFlavourRationalRatio.h>
#include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRational.h>
#include <Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h>
#include <Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatioMixedPrec.h>
#include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h>
#include <Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h>

View File

@ -83,10 +83,16 @@ NAMESPACE_BEGIN(Grid);
return sstream.str();
}
//Access the fermion field
const FermionField &getPhiOdd() const{ return PhiOdd; }
virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) {
// P(phi) = e^{- phi^dag Vpc (MpcdagMpc)^-1 Vpcdag phi}
//
// NumOp == V
// DenOp == M
//
// Take phi_o = Vpcdag^{-1} Mpcdag eta_o ; eta_o = Mpcdag^{-1} Vpcdag Phi
//
// P(eta_o) = e^{- eta_o^dag eta_o}
//
// e^{x^2/2 sig^2} => sig^2 = 0.5.
@ -94,22 +100,12 @@ NAMESPACE_BEGIN(Grid);
RealD scale = std::sqrt(0.5);
FermionField eta (NumOp.FermionGrid());
gaussian(pRNG,eta); eta = eta * scale;
refresh(U,eta);
}
void refresh(const GaugeField &U, const FermionField &eta) {
// P(phi) = e^{- phi^dag Vpc (MpcdagMpc)^-1 Vpcdag phi}
//
// NumOp == V
// DenOp == M
//
// Take phi_o = Vpcdag^{-1} Mpcdag eta_o ; eta_o = Mpcdag^{-1} Vpcdag Phi
FermionField etaOdd (NumOp.FermionRedBlackGrid());
FermionField etaEven(NumOp.FermionRedBlackGrid());
FermionField tmp (NumOp.FermionRedBlackGrid());
gaussian(pRNG,eta);
pickCheckerboard(Even,etaEven,eta);
pickCheckerboard(Odd,etaOdd,eta);
@ -129,8 +125,8 @@ NAMESPACE_BEGIN(Grid);
DenOp.MooeeDag(etaEven,tmp);
NumOp.MooeeInvDag(tmp,PhiEven);
//PhiOdd =PhiOdd*scale;
//PhiEven=PhiEven*scale;
PhiOdd =PhiOdd*scale;
PhiEven=PhiEven*scale;
};

View File

@ -1,6 +0,0 @@
#ifndef GRID_GPARITY_H_
#define GRID_GPARITY_H_
#include<Grid/qcd/gparity/GparityFlavour.h>
#endif

View File

@ -1,34 +0,0 @@
#include <Grid/Grid.h>
NAMESPACE_BEGIN(Grid);
const std::array<const GparityFlavour, 3> GparityFlavour::sigma_mu = {{
GparityFlavour(GparityFlavour::Algebra::SigmaX),
GparityFlavour(GparityFlavour::Algebra::SigmaY),
GparityFlavour(GparityFlavour::Algebra::SigmaZ)
}};
const std::array<const GparityFlavour, 6> GparityFlavour::sigma_all = {{
GparityFlavour(GparityFlavour::Algebra::Identity),
GparityFlavour(GparityFlavour::Algebra::SigmaX),
GparityFlavour(GparityFlavour::Algebra::SigmaY),
GparityFlavour(GparityFlavour::Algebra::SigmaZ),
GparityFlavour(GparityFlavour::Algebra::ProjPlus),
GparityFlavour(GparityFlavour::Algebra::ProjMinus)
}};
const std::array<const char *, GparityFlavour::nSigma> GparityFlavour::name = {{
"SigmaX",
"MinusSigmaX",
"SigmaY",
"MinusSigmaY",
"SigmaZ",
"MinusSigmaZ",
"Identity",
"MinusIdentity",
"ProjPlus",
"MinusProjPlus",
"ProjMinus",
"MinusProjMinus"}};
NAMESPACE_END(Grid);

View File

@ -1,475 +0,0 @@
#ifndef GRID_QCD_GPARITY_FLAVOUR_H
#define GRID_QCD_GPARITY_FLAVOUR_H
//Support for flavour-matrix operations acting on the G-parity flavour index
#include <array>
NAMESPACE_BEGIN(Grid);
class GparityFlavour {
public:
GRID_SERIALIZABLE_ENUM(Algebra, undef,
SigmaX, 0,
MinusSigmaX, 1,
SigmaY, 2,
MinusSigmaY, 3,
SigmaZ, 4,
MinusSigmaZ, 5,
Identity, 6,
MinusIdentity, 7,
ProjPlus, 8,
MinusProjPlus, 9,
ProjMinus, 10,
MinusProjMinus, 11
);
static constexpr unsigned int nSigma = 12;
static const std::array<const char *, nSigma> name;
static const std::array<const GparityFlavour, 3> sigma_mu;
static const std::array<const GparityFlavour, 6> sigma_all;
Algebra g;
public:
accelerator GparityFlavour(Algebra initg): g(initg) {}
};
// 0 1 x vector
// 1 0
template<class vtype>
accelerator_inline void multFlavourSigmaX(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = rhs(1);
ret(1) = rhs(0);
};
template<class vtype>
accelerator_inline void lmultFlavourSigmaX(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(1,0);
ret(0,1) = rhs(1,1);
ret(1,0) = rhs(0,0);
ret(1,1) = rhs(0,1);
};
template<class vtype>
accelerator_inline void rmultFlavourSigmaX(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(0,1);
ret(0,1) = rhs(0,0);
ret(1,0) = rhs(1,1);
ret(1,1) = rhs(1,0);
};
template<class vtype>
accelerator_inline void multFlavourMinusSigmaX(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = -rhs(1);
ret(1) = -rhs(0);
};
template<class vtype>
accelerator_inline void lmultFlavourMinusSigmaX(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(1,0);
ret(0,1) = -rhs(1,1);
ret(1,0) = -rhs(0,0);
ret(1,1) = -rhs(0,1);
};
template<class vtype>
accelerator_inline void rmultFlavourMinusSigmaX(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(0,1);
ret(0,1) = -rhs(0,0);
ret(1,0) = -rhs(1,1);
ret(1,1) = -rhs(1,0);
};
// 0 -i x vector
// i 0
template<class vtype>
accelerator_inline void multFlavourSigmaY(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = timesMinusI(rhs(1));
ret(1) = timesI(rhs(0));
};
template<class vtype>
accelerator_inline void lmultFlavourSigmaY(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = timesMinusI(rhs(1,0));
ret(0,1) = timesMinusI(rhs(1,1));
ret(1,0) = timesI(rhs(0,0));
ret(1,1) = timesI(rhs(0,1));
};
template<class vtype>
accelerator_inline void rmultFlavourSigmaY(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = timesI(rhs(0,1));
ret(0,1) = timesMinusI(rhs(0,0));
ret(1,0) = timesI(rhs(1,1));
ret(1,1) = timesMinusI(rhs(1,0));
};
template<class vtype>
accelerator_inline void multFlavourMinusSigmaY(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = timesI(rhs(1));
ret(1) = timesMinusI(rhs(0));
};
template<class vtype>
accelerator_inline void lmultFlavourMinusSigmaY(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = timesI(rhs(1,0));
ret(0,1) = timesI(rhs(1,1));
ret(1,0) = timesMinusI(rhs(0,0));
ret(1,1) = timesMinusI(rhs(0,1));
};
template<class vtype>
accelerator_inline void rmultFlavourMinusSigmaY(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = timesMinusI(rhs(0,1));
ret(0,1) = timesI(rhs(0,0));
ret(1,0) = timesMinusI(rhs(1,1));
ret(1,1) = timesI(rhs(1,0));
};
// 1 0 x vector
// 0 -1
template<class vtype>
accelerator_inline void multFlavourSigmaZ(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = rhs(0);
ret(1) = -rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourSigmaZ(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(0,0);
ret(0,1) = rhs(0,1);
ret(1,0) = -rhs(1,0);
ret(1,1) = -rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourSigmaZ(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(0,0);
ret(0,1) = -rhs(0,1);
ret(1,0) = rhs(1,0);
ret(1,1) = -rhs(1,1);
};
template<class vtype>
accelerator_inline void multFlavourMinusSigmaZ(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = -rhs(0);
ret(1) = rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourMinusSigmaZ(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(0,0);
ret(0,1) = -rhs(0,1);
ret(1,0) = rhs(1,0);
ret(1,1) = rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourMinusSigmaZ(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(0,0);
ret(0,1) = rhs(0,1);
ret(1,0) = -rhs(1,0);
ret(1,1) = rhs(1,1);
};
template<class vtype>
accelerator_inline void multFlavourIdentity(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = rhs(0);
ret(1) = rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourIdentity(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(0,0);
ret(0,1) = rhs(0,1);
ret(1,0) = rhs(1,0);
ret(1,1) = rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourIdentity(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(0,0);
ret(0,1) = rhs(0,1);
ret(1,0) = rhs(1,0);
ret(1,1) = rhs(1,1);
};
template<class vtype>
accelerator_inline void multFlavourMinusIdentity(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = -rhs(0);
ret(1) = -rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourMinusIdentity(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(0,0);
ret(0,1) = -rhs(0,1);
ret(1,0) = -rhs(1,0);
ret(1,1) = -rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourMinusIdentity(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(0,0);
ret(0,1) = -rhs(0,1);
ret(1,0) = -rhs(1,0);
ret(1,1) = -rhs(1,1);
};
//G-parity flavour projection 1/2(1+\sigma_2)
//1 -i
//i 1
template<class vtype>
accelerator_inline void multFlavourProjPlus(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = 0.5*rhs(0) + 0.5*timesMinusI(rhs(1));
ret(1) = 0.5*timesI(rhs(0)) + 0.5*rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourProjPlus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = 0.5*rhs(0,0) + 0.5*timesMinusI(rhs(1,0));
ret(0,1) = 0.5*rhs(0,1) + 0.5*timesMinusI(rhs(1,1));
ret(1,0) = 0.5*timesI(rhs(0,0)) + 0.5*rhs(1,0);
ret(1,1) = 0.5*timesI(rhs(0,1)) + 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourProjPlus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = 0.5*rhs(0,0) + 0.5*timesI(rhs(0,1));
ret(0,1) = 0.5*timesMinusI(rhs(0,0)) + 0.5*rhs(0,1);
ret(1,0) = 0.5*rhs(1,0) + 0.5*timesI(rhs(1,1));
ret(1,1) = 0.5*timesMinusI(rhs(1,0)) + 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void multFlavourMinusProjPlus(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = -0.5*rhs(0) + 0.5*timesI(rhs(1));
ret(1) = 0.5*timesMinusI(rhs(0)) - 0.5*rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourMinusProjPlus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -0.5*rhs(0,0) + 0.5*timesI(rhs(1,0));
ret(0,1) = -0.5*rhs(0,1) + 0.5*timesI(rhs(1,1));
ret(1,0) = 0.5*timesMinusI(rhs(0,0)) - 0.5*rhs(1,0);
ret(1,1) = 0.5*timesMinusI(rhs(0,1)) - 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourMinusProjPlus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -0.5*rhs(0,0) + 0.5*timesMinusI(rhs(0,1));
ret(0,1) = 0.5*timesI(rhs(0,0)) - 0.5*rhs(0,1);
ret(1,0) = -0.5*rhs(1,0) + 0.5*timesMinusI(rhs(1,1));
ret(1,1) = 0.5*timesI(rhs(1,0)) - 0.5*rhs(1,1);
};
//G-parity flavour projection 1/2(1-\sigma_2)
//1 i
//-i 1
template<class vtype>
accelerator_inline void multFlavourProjMinus(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = 0.5*rhs(0) + 0.5*timesI(rhs(1));
ret(1) = 0.5*timesMinusI(rhs(0)) + 0.5*rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourProjMinus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = 0.5*rhs(0,0) + 0.5*timesI(rhs(1,0));
ret(0,1) = 0.5*rhs(0,1) + 0.5*timesI(rhs(1,1));
ret(1,0) = 0.5*timesMinusI(rhs(0,0)) + 0.5*rhs(1,0);
ret(1,1) = 0.5*timesMinusI(rhs(0,1)) + 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourProjMinus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = 0.5*rhs(0,0) + 0.5*timesMinusI(rhs(0,1));
ret(0,1) = 0.5*timesI(rhs(0,0)) + 0.5*rhs(0,1);
ret(1,0) = 0.5*rhs(1,0) + 0.5*timesMinusI(rhs(1,1));
ret(1,1) = 0.5*timesI(rhs(1,0)) + 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void multFlavourMinusProjMinus(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = -0.5*rhs(0) + 0.5*timesMinusI(rhs(1));
ret(1) = 0.5*timesI(rhs(0)) - 0.5*rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourMinusProjMinus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -0.5*rhs(0,0) + 0.5*timesMinusI(rhs(1,0));
ret(0,1) = -0.5*rhs(0,1) + 0.5*timesMinusI(rhs(1,1));
ret(1,0) = 0.5*timesI(rhs(0,0)) - 0.5*rhs(1,0);
ret(1,1) = 0.5*timesI(rhs(0,1)) - 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourMinusProjMinus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -0.5*rhs(0,0) + 0.5*timesI(rhs(0,1));
ret(0,1) = 0.5*timesMinusI(rhs(0,0)) - 0.5*rhs(0,1);
ret(1,0) = -0.5*rhs(1,0) + 0.5*timesI(rhs(1,1));
ret(1,1) = 0.5*timesMinusI(rhs(1,0)) - 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline auto operator*(const GparityFlavour &G, const iVector<vtype, Ngp> &arg)
->typename std::enable_if<matchGridTensorIndex<iVector<vtype, Ngp>, GparityFlavourTensorIndex>::value, iVector<vtype, Ngp>>::type
{
iVector<vtype, Ngp> ret;
switch (G.g)
{
case GparityFlavour::Algebra::SigmaX:
multFlavourSigmaX(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaX:
multFlavourMinusSigmaX(ret, arg); break;
case GparityFlavour::Algebra::SigmaY:
multFlavourSigmaY(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaY:
multFlavourMinusSigmaY(ret, arg); break;
case GparityFlavour::Algebra::SigmaZ:
multFlavourSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaZ:
multFlavourMinusSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::Identity:
multFlavourIdentity(ret, arg); break;
case GparityFlavour::Algebra::MinusIdentity:
multFlavourMinusIdentity(ret, arg); break;
case GparityFlavour::Algebra::ProjPlus:
multFlavourProjPlus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjPlus:
multFlavourMinusProjPlus(ret, arg); break;
case GparityFlavour::Algebra::ProjMinus:
multFlavourProjMinus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjMinus:
multFlavourMinusProjMinus(ret, arg); break;
default: assert(0);
}
return ret;
}
template<class vtype>
accelerator_inline auto operator*(const GparityFlavour &G, const iMatrix<vtype, Ngp> &arg)
->typename std::enable_if<matchGridTensorIndex<iMatrix<vtype, Ngp>, GparityFlavourTensorIndex>::value, iMatrix<vtype, Ngp>>::type
{
iMatrix<vtype, Ngp> ret;
switch (G.g)
{
case GparityFlavour::Algebra::SigmaX:
lmultFlavourSigmaX(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaX:
lmultFlavourMinusSigmaX(ret, arg); break;
case GparityFlavour::Algebra::SigmaY:
lmultFlavourSigmaY(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaY:
lmultFlavourMinusSigmaY(ret, arg); break;
case GparityFlavour::Algebra::SigmaZ:
lmultFlavourSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaZ:
lmultFlavourMinusSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::Identity:
lmultFlavourIdentity(ret, arg); break;
case GparityFlavour::Algebra::MinusIdentity:
lmultFlavourMinusIdentity(ret, arg); break;
case GparityFlavour::Algebra::ProjPlus:
lmultFlavourProjPlus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjPlus:
lmultFlavourMinusProjPlus(ret, arg); break;
case GparityFlavour::Algebra::ProjMinus:
lmultFlavourProjMinus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjMinus:
lmultFlavourMinusProjMinus(ret, arg); break;
default: assert(0);
}
return ret;
}
template<class vtype>
accelerator_inline auto operator*(const iMatrix<vtype, Ngp> &arg, const GparityFlavour &G)
->typename std::enable_if<matchGridTensorIndex<iMatrix<vtype, Ngp>, GparityFlavourTensorIndex>::value, iMatrix<vtype, Ngp>>::type
{
iMatrix<vtype, Ngp> ret;
switch (G.g)
{
case GparityFlavour::Algebra::SigmaX:
rmultFlavourSigmaX(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaX:
rmultFlavourMinusSigmaX(ret, arg); break;
case GparityFlavour::Algebra::SigmaY:
rmultFlavourSigmaY(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaY:
rmultFlavourMinusSigmaY(ret, arg); break;
case GparityFlavour::Algebra::SigmaZ:
rmultFlavourSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaZ:
rmultFlavourMinusSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::Identity:
rmultFlavourIdentity(ret, arg); break;
case GparityFlavour::Algebra::MinusIdentity:
rmultFlavourMinusIdentity(ret, arg); break;
case GparityFlavour::Algebra::ProjPlus:
rmultFlavourProjPlus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjPlus:
rmultFlavourMinusProjPlus(ret, arg); break;
case GparityFlavour::Algebra::ProjMinus:
rmultFlavourProjMinus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjMinus:
rmultFlavourMinusProjMinus(ret, arg); break;
default: assert(0);
}
return ret;
}
NAMESPACE_END(Grid);
#endif // include guard

View File

@ -129,10 +129,18 @@ public:
Runner(S);
}
//Use the checkpointer to initialize the RNGs and the gauge field, writing the resulting gauge field into U.
//This is called automatically by Run but may be useful elsewhere, e.g. for integrator tuning experiments
void initializeGaugeFieldAndRNGs(Field &U){
if(!Resources.haveRNGs()) Resources.AddRNGs();
//////////////////////////////////////////////////////////////////
private:
template <class SmearingPolicy>
void Runner(SmearingPolicy &Smearing) {
auto UGrid = Resources.GetCartesian();
Resources.AddRNGs();
Field U(UGrid);
// Can move this outside?
typedef IntegratorType<SmearingPolicy> TheIntegrator;
TheIntegrator MDynamics(UGrid, Parameters.MD, TheAction, Smearing);
if (Parameters.StartingType == "HotStart") {
// Hot start
@ -151,40 +159,14 @@ public:
Resources.GetCheckPointer()->CheckpointRestore(Parameters.StartTrajectory, U,
Resources.GetSerialRNG(),
Resources.GetParallelRNG());
} else if (Parameters.StartingType == "CheckpointStartReseed") {
// Same as CheckpointRestart but reseed the RNGs using the fixed integer seeding used for ColdStart and HotStart
// Useful for creating new evolution streams from an existing stream
// WARNING: Unfortunately because the checkpointer doesn't presently allow us to separately restore the RNG and gauge fields we have to load
// an existing RNG checkpoint first; make sure one is available and named correctly
Resources.GetCheckPointer()->CheckpointRestore(Parameters.StartTrajectory, U,
Resources.GetSerialRNG(),
Resources.GetParallelRNG());
Resources.SeedFixedIntegers();
} else {
// others
std::cout << GridLogError << "Unrecognized StartingType\n";
std::cout
<< GridLogError
<< "Valid [HotStart, ColdStart, TepidStart, CheckpointStart, CheckpointStartReseed]\n";
<< "Valid [HotStart, ColdStart, TepidStart, CheckpointStart]\n";
exit(1);
}
}
//////////////////////////////////////////////////////////////////
private:
template <class SmearingPolicy>
void Runner(SmearingPolicy &Smearing) {
auto UGrid = Resources.GetCartesian();
Field U(UGrid);
initializeGaugeFieldAndRNGs(U);
typedef IntegratorType<SmearingPolicy> TheIntegrator;
TheIntegrator MDynamics(UGrid, Parameters.MD, TheAction, Smearing);
Smearing.set_Field(U);

View File

@ -115,21 +115,21 @@ private:
random(sRNG, rn_test);
std::cout << GridLogHMC
std::cout << GridLogMessage
<< "--------------------------------------------------\n";
std::cout << GridLogHMC << "exp(-dH) = " << prob
std::cout << GridLogMessage << "exp(-dH) = " << prob
<< " Random = " << rn_test << "\n";
std::cout << GridLogHMC
std::cout << GridLogMessage
<< "Acc. Probability = " << ((prob < 1.0) ? prob : 1.0) << "\n";
if ((prob > 1.0) || (rn_test <= prob)) { // accepted
std::cout << GridLogHMC << "Metropolis_test -- ACCEPTED\n";
std::cout << GridLogHMC
std::cout << GridLogMessage << "Metropolis_test -- ACCEPTED\n";
std::cout << GridLogMessage
<< "--------------------------------------------------\n";
return true;
} else { // rejected
std::cout << GridLogHMC << "Metropolis_test -- REJECTED\n";
std::cout << GridLogHMC
std::cout << GridLogMessage << "Metropolis_test -- REJECTED\n";
std::cout << GridLogMessage
<< "--------------------------------------------------\n";
return false;
}
@ -145,7 +145,7 @@ private:
std::streamsize current_precision = std::cout.precision();
std::cout.precision(15);
std::cout << GridLogHMC << "Total H before trajectory = " << H0 << "\n";
std::cout << GridLogMessage << "Total H before trajectory = " << H0 << "\n";
std::cout.precision(current_precision);
TheIntegrator.integrate(U);
@ -165,7 +165,7 @@ private:
std::cout.precision(15);
std::cout << GridLogHMC << "Total H after trajectory = " << H1
std::cout << GridLogMessage << "Total H after trajectory = " << H1
<< " dH = " << H1 - H0 << "\n";
std::cout.precision(current_precision);
@ -196,9 +196,9 @@ public:
// Actual updates (evolve a copy Ucopy then copy back eventually)
unsigned int FinalTrajectory = Params.Trajectories + Params.NoMetropolisUntil + Params.StartTrajectory;
for (int traj = Params.StartTrajectory; traj < FinalTrajectory; ++traj) {
std::cout << GridLogHMC << "-- # Trajectory = " << traj << "\n";
std::cout << GridLogMessage << "-- # Trajectory = " << traj << "\n";
if (traj < Params.StartTrajectory + Params.NoMetropolisUntil) {
std::cout << GridLogHMC << "-- Thermalization" << std::endl;
std::cout << GridLogMessage << "-- Thermalization" << std::endl;
}
double t0=usecond();
@ -207,10 +207,10 @@ public:
DeltaH = evolve_hmc_step(Ucopy);
// Metropolis-Hastings test
bool accept = true;
if (Params.MetropolisTest && traj >= Params.StartTrajectory + Params.NoMetropolisUntil) {
if (traj >= Params.StartTrajectory + Params.NoMetropolisUntil) {
accept = metropolis_test(DeltaH);
} else {
std::cout << GridLogHMC << "Skipping Metropolis test" << std::endl;
std::cout << GridLogMessage << "Skipping Metropolis test" << std::endl;
}
if (accept)
@ -219,7 +219,7 @@ public:
double t1=usecond();
std::cout << GridLogHMC << "Total time for trajectory (s): " << (t1-t0)/1e6 << std::endl;
std::cout << GridLogMessage << "Total time for trajectory (s): " << (t1-t0)/1e6 << std::endl;
for (int obs = 0; obs < Observables.size(); obs++) {
@ -228,7 +228,7 @@ public:
std::cout << GridLogDebug << "Observables pointer " << Observables[obs] << std::endl;
Observables[obs]->TrajectoryComplete(traj + 1, Ucur, sRNG, pRNG);
}
std::cout << GridLogHMC << ":::::::::::::::::::::::::::::::::::::::::::" << std::endl;
std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::::" << std::endl;
}
}

View File

@ -80,9 +80,7 @@ public:
std::cout << GridLogError << "Seeds not initialized" << std::endl;
exit(1);
}
std::cout << GridLogMessage << "Reseeding serial RNG with seed vector " << SerialSeeds << std::endl;
sRNG_.SeedFixedIntegers(SerialSeeds);
std::cout << GridLogMessage << "Reseeding parallel RNG with seed vector " << ParallelSeeds << std::endl;
pRNG_->SeedFixedIntegers(ParallelSeeds);
}
};

View File

@ -226,9 +226,6 @@ public:
//////////////////////////////////////////////////////
// Random number generators
//////////////////////////////////////////////////////
//Return true if the RNG objects have been instantiated
bool haveRNGs() const{ return have_RNG; }
void AddRNGs(std::string s = "") {
// Couple the RNGs to the GridModule tagged by s

View File

@ -136,14 +136,8 @@ protected:
if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
force = FieldImplementation::projectForce(force); // Ta for gauge fields
double end_force = usecond();
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x])
Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
Real max_force_abs = std::sqrt(maxLocalNorm2(force));
Real max_impulse_abs = max_force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] Force average: " << force_abs << " Max force: " << max_force_abs << " Time step: " << ep << " Impulse average: " << impulse_abs << " Max impulse: " << max_impulse_abs << std::endl;
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites());
std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] Force average: " << force_abs << std::endl;
Mom -= force * ep* HMC_MOMENTUM_DENOMINATOR;;
double end_full = usecond();
double time_full = (end_full - start_full) / 1e3;
@ -255,19 +249,15 @@ public:
void refresh(Field& U, GridSerialRNG & sRNG, GridParallelRNG& pRNG)
{
assert(P.Grid() == U.Grid());
std::cout << GridLogIntegrator << "Integrator refresh" << std::endl;
std::cout << GridLogIntegrator << "Integrator refresh\n";
std::cout << GridLogIntegrator << "Generating momentum" << std::endl;
FieldImplementation::generate_momenta(P, sRNG, pRNG);
// Update the smeared fields, can be implemented as observer
// necessary to keep the fields updated even after a reject
// of the Metropolis
std::cout << GridLogIntegrator << "Updating smeared fields" << std::endl;
Smearer.set_Field(U);
// Set the (eventual) representations gauge fields
std::cout << GridLogIntegrator << "Updating representations" << std::endl;
Representations.update(U);
// The Smearer is attached to a pointer of the gauge field
@ -277,7 +267,6 @@ public:
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
// get gauge field from the SmearingPolicy and
// based on the boolean is_smeared in actionID
std::cout << GridLogIntegrator << "Refreshing integrator level " << level << " index " << actionID << std::endl;
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
as[level].actions.at(actionID)->refresh(Us, sRNG, pRNG);
}

View File

@ -99,7 +99,7 @@ public:
// using wilson flow by default here
WilsonFlow<PeriodicGimplR> WF(Pars.Smearing.steps, Pars.Smearing.step_size, Pars.Smearing.meas_interval);
WF.smear_adaptive(Usmear, U, Pars.Smearing.maxTau);
Real T0 = WF.energyDensityPlaquette(Pars.Smearing.maxTau, Usmear);
Real T0 = WF.energyDensityPlaquette(Usmear);
std::cout << GridLogMessage << std::setprecision(std::numeric_limits<Real>::digits10 + 1)
<< "T0 : [ " << traj << " ] "<< T0 << std::endl;
}

View File

@ -7,7 +7,6 @@ Source file: ./lib/qcd/modules/plaquette.h
Copyright (C) 2017
Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Christopher Kelly <ckelly@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -34,44 +33,28 @@ NAMESPACE_BEGIN(Grid);
template <class Gimpl>
class WilsonFlow: public Smear<Gimpl>{
public:
//Store generic measurements to take during smearing process using std::function
typedef std::function<void(int, RealD, const typename Gimpl::GaugeField &)> FunctionType; //int: step, RealD: flow time, GaugeField : the gauge field
private:
unsigned int Nstep;
RealD epsilon; //for regular smearing this is the time step, for adaptive it is the initial time step
std::vector< std::pair<int, FunctionType> > functions; //The int maps to the measurement frequency
unsigned int measure_interval;
mutable RealD epsilon, taus;
mutable WilsonGaugeAction<Gimpl> SG;
//Evolve the gauge field by 1 step and update tau
void evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const;
//Evolve the gauge field by 1 step and update tau and the current time step eps
void evolve_step_adaptive(typename Gimpl::GaugeField&U, RealD &tau, RealD &eps, RealD maxTau) const;
void evolve_step(typename Gimpl::GaugeField&) const;
void evolve_step_adaptive(typename Gimpl::GaugeField&, RealD);
RealD tau(unsigned int t)const {return epsilon*(t+1.0); }
public:
INHERIT_GIMPL_TYPES(Gimpl)
void resetActions(){ functions.clear(); }
void addMeasurement(int meas_interval, FunctionType meas){ functions.push_back({meas_interval, meas}); }
//Set the class to perform the default measurements:
//the plaquette energy density every step
//the plaquette topological charge every 'topq_meas_interval' steps
//and output to stdout
void setDefaultMeasurements(int topq_meas_interval = 1);
explicit WilsonFlow(unsigned int Nstep, RealD epsilon, unsigned int interval = 1):
Nstep(Nstep),
epsilon(epsilon),
measure_interval(interval),
SG(WilsonGaugeAction<Gimpl>(3.0)) {
// WilsonGaugeAction with beta 3.0
assert(epsilon > 0.0);
LogMessage();
setDefaultMeasurements(interval);
}
void LogMessage() {
@ -90,29 +73,9 @@ public:
// undefined for WilsonFlow
}
void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau) const;
//Compute t^2 <E(t)> for time t from the plaquette
static RealD energyDensityPlaquette(const RealD t, const GaugeField& U);
//Compute t^2 <E(t)> for time t from the 1x1 cloverleaf form
//t is the Wilson flow time
static RealD energyDensityCloverleaf(const RealD t, const GaugeField& U);
//Evolve the gauge field by Nstep steps of epsilon and return the energy density computed every interval steps
//The smeared field is output as V
std::vector<RealD> flowMeasureEnergyDensityPlaquette(GaugeField &V, const GaugeField& U, int measure_interval = 1);
//Version that does not return the smeared field
std::vector<RealD> flowMeasureEnergyDensityPlaquette(const GaugeField& U, int measure_interval = 1);
//Evolve the gauge field by Nstep steps of epsilon and return the Cloverleaf energy density computed every interval steps
//The smeared field is output as V
std::vector<RealD> flowMeasureEnergyDensityCloverleaf(GaugeField &V, const GaugeField& U, int measure_interval = 1);
//Version that does not return the smeared field
std::vector<RealD> flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval = 1);
void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau);
RealD energyDensityPlaquette(unsigned int step, const GaugeField& U) const;
RealD energyDensityPlaquette(const GaugeField& U) const;
};
@ -120,7 +83,7 @@ public:
// Implementations
////////////////////////////////////////////////////////////////////////////////
template <class Gimpl>
void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const{
void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U) const{
GaugeField Z(U.Grid());
GaugeField tmp(U.Grid());
SG.deriv(U, Z);
@ -136,13 +99,12 @@ void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U, RealD &tau) c
SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2
tau += epsilon;
}
template <class Gimpl>
void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD &tau, RealD &eps, RealD maxTau) const{
if (maxTau - tau < eps){
eps = maxTau-tau;
void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD maxTau) {
if (maxTau - taus < epsilon){
epsilon = maxTau-taus;
}
//std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl;
GaugeField Z(U.Grid());
@ -152,151 +114,95 @@ void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, Real
SG.deriv(U, Z);
Zprime = -Z;
Z *= 0.25; // Z0 = 1/4 * F(U)
Gimpl::update_field(Z, U, -2.0*eps); // U = W1 = exp(ep*Z0)*W0
Gimpl::update_field(Z, U, -2.0*epsilon); // U = W1 = exp(ep*Z0)*W0
Z *= -17.0/8.0;
SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1
Zprime += 2.0*tmp;
Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1
Gimpl::update_field(Z, U, -2.0*eps); // U_= W2 = exp(ep*Z)*W1
Gimpl::update_field(Z, U, -2.0*epsilon); // U_= W2 = exp(ep*Z)*W1
Z *= -4.0/3.0;
SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
Gimpl::update_field(Z, U, -2.0*eps); // V(t+e) = exp(ep*Z)*W2
Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2
// Ramos
Gimpl::update_field(Zprime, Uprime, -2.0*eps); // V'(t+e) = exp(ep*Z')*W0
Gimpl::update_field(Zprime, Uprime, -2.0*epsilon); // V'(t+e) = exp(ep*Z')*W0
// Compute distance as norm^2 of the difference
GaugeField diffU = U - Uprime;
RealD diff = norm2(diffU);
// adjust integration step
tau += eps;
taus += epsilon;
//std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl;
eps = eps*0.95*std::pow(1e-4/diff,1./3.);
epsilon = epsilon*0.95*std::pow(1e-4/diff,1./3.);
//std::cout << GridLogMessage << "New epsilon : " << epsilon << std::endl;
}
template <class Gimpl>
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(const RealD t, const GaugeField& U){
static WilsonGaugeAction<Gimpl> SG(3.0);
return 2.0 * t * t * SG.S(U)/U.Grid()->gSites();
}
//Compute t^2 <E(t)> for time from the 1x1 cloverleaf form
template <class Gimpl>
RealD WilsonFlow<Gimpl>::energyDensityCloverleaf(const RealD t, const GaugeField& U){
typedef typename Gimpl::GaugeLinkField GaugeMat;
typedef typename Gimpl::GaugeField GaugeLorentz;
assert(Nd == 4);
//E = 1/2 tr( F_munu F_munu )
//However as F_numu = -F_munu, only need to sum the trace of the squares of the following 6 field strengths:
//F_01 F_02 F_03 F_12 F_13 F_23
GaugeMat F(U.Grid());
LatticeComplexD R(U.Grid());
R = Zero();
for(int mu=0;mu<3;mu++){
for(int nu=mu+1;nu<4;nu++){
WilsonLoops<Gimpl>::FieldStrength(F, U, mu, nu);
R = R + trace(F*F);
}
}
ComplexD out = sum(R);
out = t*t*out / RealD(U.Grid()->gSites());
return -real(out); //minus sign necessary for +ve energy
}
template <class Gimpl>
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityPlaquette(GaugeField &V, const GaugeField& U, int measure_interval){
std::vector<RealD> out;
resetActions();
addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Computing plaquette energy density for step " << step << std::endl;
out.push_back( energyDensityPlaquette(t,U) );
});
smear(V,U);
return out;
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(unsigned int step, const GaugeField& U) const {
RealD td = tau(step);
return 2.0 * td * td * SG.S(U)/U.Grid()->gSites();
}
template <class Gimpl>
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityPlaquette(const GaugeField& U, int measure_interval){
GaugeField V(U);
return flowMeasureEnergyDensityPlaquette(V,U, measure_interval);
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(const GaugeField& U) const {
return 2.0 * taus * taus * SG.S(U)/U.Grid()->gSites();
}
template <class Gimpl>
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityCloverleaf(GaugeField &V, const GaugeField& U, int measure_interval){
std::vector<RealD> out;
resetActions();
addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Computing Cloverleaf energy density for step " << step << std::endl;
out.push_back( energyDensityCloverleaf(t,U) );
});
smear(V,U);
return out;
}
template <class Gimpl>
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval){
GaugeField V(U);
return flowMeasureEnergyDensityCloverleaf(V,U, measure_interval);
}
//#define WF_TIMING
template <class Gimpl>
void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const{
void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const {
out = in;
RealD taus = 0.;
for (unsigned int step = 1; step <= Nstep; step++) { //step indicates the number of smearing steps applied at the time of measurement
for (unsigned int step = 1; step <= Nstep; step++) {
auto start = std::chrono::high_resolution_clock::now();
evolve_step(out, taus);
evolve_step(out);
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> diff = end - start;
#ifdef WF_TIMING
std::cout << "Time to evolve " << diff.count() << " s\n";
#endif
//Perform measurements
for(auto const &meas : functions)
if( step % meas.first == 0 ) meas.second(step,taus,out);
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
<< step << " " << tau(step) << " "
<< energyDensityPlaquette(step,out) << std::endl;
if( step % measure_interval == 0){
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : "
<< step << " "
<< WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
}
}
}
template <class Gimpl>
void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau) const{
void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau){
out = in;
RealD taus = 0.;
RealD eps = epsilon;
taus = epsilon;
unsigned int step = 0;
do{
step++;
//std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl;
evolve_step_adaptive(out, taus, eps, maxTau);
//Perform measurements
for(auto const &meas : functions)
if( step % meas.first == 0 ) meas.second(step,taus,out);
evolve_step_adaptive(out, maxTau);
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
<< step << " " << taus << " "
<< energyDensityPlaquette(out) << std::endl;
if( step % measure_interval == 0){
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : "
<< step << " "
<< WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
}
} while (taus < maxTau);
}
template <class Gimpl>
void WilsonFlow<Gimpl>::setDefaultMeasurements(int topq_meas_interval){
addMeasurement(1, [](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " << step << " " << t << " " << energyDensityPlaquette(t,U) << std::endl;
});
addMeasurement(topq_meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " << WilsonLoops<Gimpl>::TopologicalCharge(U) << std::endl;
});
}
}
NAMESPACE_END(Grid);

View File

@ -88,12 +88,6 @@ namespace PeriodicBC {
return CovShiftBackward(Link,mu,arg);
}
//Boundary-aware C-shift of gauge links / gauge transformation matrices
template<class gauge> Lattice<gauge>
CshiftLink(const Lattice<gauge> &Link, int mu, int shift)
{
return Cshift(Link, mu, shift);
}
}
@ -164,9 +158,6 @@ namespace ConjugateBC {
// std::cout<<"Gparity::CovCshiftBackward mu="<<mu<<std::endl;
return Cshift(tmp,mu,-1);// moves towards positive mu
}
//Out(x) = U^dag_\mu(x-mu) | x_\mu != 0
// = U^T_\mu(L-1) | x_\mu == 0
template<class gauge> Lattice<gauge>
CovShiftIdentityBackward(const Lattice<gauge> &Link, int mu) {
GridBase *grid = Link.Grid();
@ -185,9 +176,6 @@ namespace ConjugateBC {
return Link;
}
//Out(x) = S_\mu(x+\hat\mu) | x_\mu != L-1
// = S*_\mu(0) | x_\mu == L-1
//Note: While this is used for Staples it is also applicable for shifting gauge links or gauge transformation matrices
template<class gauge> Lattice<gauge>
ShiftStaple(const Lattice<gauge> &Link, int mu)
{
@ -220,35 +208,6 @@ namespace ConjugateBC {
return CovShiftBackward(Link,mu,arg);
}
//Boundary-aware C-shift of gauge links / gauge transformation matrices
//shift = 1
//Out(x) = U_\mu(x+\hat\mu) | x_\mu != L-1
// = U*_\mu(0) | x_\mu == L-1
//shift = -1
//Out(x) = U_\mu(x-mu) | x_\mu != 0
// = U*_\mu(L-1) | x_\mu == 0
template<class gauge> Lattice<gauge>
CshiftLink(const Lattice<gauge> &Link, int mu, int shift)
{
GridBase *grid = Link.Grid();
int Lmu = grid->GlobalDimensions()[mu] - 1;
Lattice<iScalar<vInteger>> coor(grid);
LatticeCoordinate(coor, mu);
Lattice<gauge> tmp(grid);
if(shift == 1){
tmp = Cshift(Link, mu, 1);
tmp = where(coor == Lmu, conjugate(tmp), tmp);
return tmp;
}else if(shift == -1){
tmp = Link;
tmp = where(coor == Lmu, conjugate(tmp), tmp);
return Cshift(tmp, mu, -1);
}else assert(0 && "Invalid shift value");
return tmp; //shuts up the compiler fussing about the return type
}
}

View File

@ -40,46 +40,27 @@ public:
typedef typename Gimpl::GaugeLinkField GaugeMat;
typedef typename Gimpl::GaugeField GaugeLorentz;
//A_\mu(x) = -i Ta(U_\mu(x) ) where Ta(U) = 1/2( U - U^dag ) - 1/2N tr(U - U^dag) is the traceless antihermitian part. This is an O(A^3) approximation to the logarithm of U
static void GaugeLinkToLieAlgebraField(const GaugeMat &U, GaugeMat &A) {
Complex cmi(0.0,-1.0);
A = Ta(U) * cmi;
static void GaugeLinkToLieAlgebraField(const std::vector<GaugeMat> &U,std::vector<GaugeMat> &A) {
for(int mu=0;mu<Nd;mu++){
Complex cmi(0.0,-1.0);
A[mu] = Ta(U[mu]) * cmi;
}
}
//The derivative of the Lie algebra field
static void DmuAmu(const std::vector<GaugeMat> &U, GaugeMat &dmuAmu,int orthog) {
GridBase* grid = U[0].Grid();
GaugeMat Ax(grid);
GaugeMat Axm1(grid);
GaugeMat Utmp(grid);
static void DmuAmu(const std::vector<GaugeMat> &A,GaugeMat &dmuAmu,int orthog) {
dmuAmu=Zero();
for(int mu=0;mu<Nd;mu++){
if ( mu != orthog ) {
//Rather than define functionality to work out how the BCs apply to A_\mu we simply use the BC-aware Cshift to the gauge links and compute A_\mu(x) and A_\mu(x-1) separately
//Ax = A_\mu(x)
GaugeLinkToLieAlgebraField(U[mu], Ax);
//Axm1 = A_\mu(x_\mu-1)
Utmp = Gimpl::CshiftLink(U[mu], mu, -1);
GaugeLinkToLieAlgebraField(Utmp, Axm1);
//Derivative
dmuAmu = dmuAmu + Ax - Axm1;
dmuAmu = dmuAmu + A[mu] - Cshift(A[mu],mu,-1);
}
}
}
//Fix the gauge field Umu
//0 < alpha < 1 is related to the step size, cf https://arxiv.org/pdf/1405.5812.pdf
static void SteepestDescentGaugeFix(GaugeLorentz &Umu, Real alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1) {
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) {
GridBase *grid = Umu.Grid();
GaugeMat xform(grid);
SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier,orthog);
SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier,orthog,err_on_no_converge);
}
//Fix the gauge field Umu and also return the gauge transformation from the original gauge field, xform
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform, Real alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1) {
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) {
GridBase *grid = Umu.Grid();
@ -141,24 +122,29 @@ public:
}
}
assert(0 && "Gauge fixing did not converge within the specified number of iterations");
std::cout << GridLogError << "Gauge fixing did not converge in " << maxiter << " iterations." << std::endl;
if (err_on_no_converge) assert(0);
};
static Real SteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform, Real alpha, GaugeMat & dmuAmu,int orthog) {
static Real SteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu,int orthog) {
GridBase *grid = U[0].Grid();
std::vector<GaugeMat> A(Nd,grid);
GaugeMat g(grid);
ExpiAlphaDmuAmu(U,g,alpha,dmuAmu,orthog);
GaugeLinkToLieAlgebraField(U,A);
ExpiAlphaDmuAmu(A,g,alpha,dmuAmu,orthog);
Real vol = grid->gSites();
Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
xform = g*xform ;
SU<Nc>::GaugeTransform<Gimpl>(U,g);
SU<Nc>::GaugeTransform(U,g);
return trG;
}
static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform, Real alpha, GaugeMat & dmuAmu,int orthog) {
static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu,int orthog) {
GridBase *grid = U[0].Grid();
@ -173,7 +159,11 @@ public:
GaugeMat g(grid);
GaugeMat dmuAmu_p(grid);
DmuAmu(U,dmuAmu,orthog);
std::vector<GaugeMat> A(Nd,grid);
GaugeLinkToLieAlgebraField(U,A);
DmuAmu(A,dmuAmu,orthog);
std::vector<int> mask(Nd,1);
for(int mu=0;mu<Nd;mu++) if (mu==orthog) mask[mu]=0;
@ -217,16 +207,16 @@ public:
Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
xform = g*xform ;
SU<Nc>::GaugeTransform<Gimpl>(U,g);
SU<Nc>::GaugeTransform(U,g);
return trG;
}
static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &U,GaugeMat &g, Real alpha, GaugeMat &dmuAmu,int orthog) {
static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu,int orthog) {
GridBase *grid = g.Grid();
Complex cialpha(0.0,-alpha);
GaugeMat ciadmam(grid);
DmuAmu(U,dmuAmu,orthog);
DmuAmu(A,dmuAmu,orthog);
ciadmam = dmuAmu*cialpha;
SU<Nc>::taExp(ciadmam,g);
}

View File

@ -694,32 +694,32 @@ public:
* Adjoint rep gauge xform
*/
template<typename Gimpl>
static void GaugeTransform(typename Gimpl::GaugeField &Umu, typename Gimpl::GaugeLinkField &g){
template<typename GaugeField,typename GaugeMat>
static void GaugeTransform( GaugeField &Umu, GaugeMat &g){
GridBase *grid = Umu.Grid();
conformable(grid,g.Grid());
typename Gimpl::GaugeLinkField U(grid);
typename Gimpl::GaugeLinkField ag(grid); ag = adj(g);
GaugeMat U(grid);
GaugeMat ag(grid); ag = adj(g);
for(int mu=0;mu<Nd;mu++){
U= PeekIndex<LorentzIndex>(Umu,mu);
U = g*U*Gimpl::CshiftLink(ag, mu, 1); //BC-aware
U = g*U*Cshift(ag, mu, 1);
PokeIndex<LorentzIndex>(Umu,U,mu);
}
}
template<typename Gimpl>
static void GaugeTransform( std::vector<typename Gimpl::GaugeLinkField> &U, typename Gimpl::GaugeLinkField &g){
template<typename GaugeMat>
static void GaugeTransform( std::vector<GaugeMat> &U, GaugeMat &g){
GridBase *grid = g.Grid();
typename Gimpl::GaugeLinkField ag(grid); ag = adj(g);
GaugeMat ag(grid); ag = adj(g);
for(int mu=0;mu<Nd;mu++){
U[mu] = g*U[mu]*Gimpl::CshiftLink(ag, mu, 1); //BC-aware
U[mu] = g*U[mu]*Cshift(ag, mu, 1);
}
}
template<typename Gimpl>
static void RandomGaugeTransform(GridParallelRNG &pRNG, typename Gimpl::GaugeField &Umu, typename Gimpl::GaugeLinkField &g){
template<typename GaugeField,typename GaugeMat>
static void RandomGaugeTransform(GridParallelRNG &pRNG, GaugeField &Umu, GaugeMat &g){
LieRandomize(pRNG,g,1.0);
GaugeTransform<Gimpl>(Umu,g);
GaugeTransform(Umu,g);
}
// Projects the algebra components a lattice matrix (of dimension ncol*ncol -1 )

View File

@ -125,57 +125,6 @@ public:
return sumplaq / vol / faces / Nc; // Nd , Nc dependent... FIXME
}
//////////////////////////////////////////////////
// sum over all spatial planes of plaquette
//////////////////////////////////////////////////
static void siteSpatialPlaquette(ComplexField &Plaq,
const std::vector<GaugeMat> &U) {
ComplexField sitePlaq(U[0].Grid());
Plaq = Zero();
for (int mu = 1; mu < Nd-1; mu++) {
for (int nu = 0; nu < mu; nu++) {
traceDirPlaquette(sitePlaq, U, mu, nu);
Plaq = Plaq + sitePlaq;
}
}
}
////////////////////////////////////
// sum over all x,y,z and over all spatial planes of plaquette
//////////////////////////////////////////////////
static std::vector<RealD> timesliceSumSpatialPlaquette(const GaugeLorentz &Umu) {
std::vector<GaugeMat> U(Nd, Umu.Grid());
// inefficient here
for (int mu = 0; mu < Nd; mu++) {
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
}
ComplexField Plaq(Umu.Grid());
siteSpatialPlaquette(Plaq, U);
typedef typename ComplexField::scalar_object sobj;
std::vector<sobj> Tq;
sliceSum(Plaq, Tq, Nd-1);
std::vector<Real> out(Tq.size());
for(int t=0;t<Tq.size();t++) out[t] = TensorRemove(Tq[t]).real();
return out;
}
//////////////////////////////////////////////////
// average over all x,y,z and over all spatial planes of plaquette
//////////////////////////////////////////////////
static std::vector<RealD> timesliceAvgSpatialPlaquette(const GaugeLorentz &Umu) {
std::vector<RealD> sumplaq = timesliceSumSpatialPlaquette(Umu);
int Lt = Umu.Grid()->FullDimensions()[Nd-1];
assert(sumplaq.size() == Lt);
double vol = Umu.Grid()->gSites() / Lt;
double faces = (1.0 * (Nd - 1)* (Nd - 2)) / 2.0;
for(int t=0;t<Lt;t++)
sumplaq[t] = sumplaq[t] / vol / faces / Nc; // Nd , Nc dependent... FIXME
return sumplaq;
}
//////////////////////////////////////////////////
// average over all x,y,z the temporal loop
//////////////////////////////////////////////////
@ -215,7 +164,7 @@ public:
double vol = Umu.Grid()->gSites();
return p.real() / vol / 4.0 / 3.0;
return p.real() / vol / (4.0 * Nc ) ;
};
//////////////////////////////////////////////////
@ -413,11 +362,11 @@ public:
GaugeMat u = PeekIndex<LorentzIndex>(Umu, mu); // some redundant copies
GaugeMat vu = v*u;
//FS = 0.25*Ta(u*v + Cshift(vu, mu, -1));
FS = (u*v + Gimpl::CshiftLink(vu, mu, -1));
FS = (u*v + Cshift(vu, mu, -1));
FS = 0.125*(FS - adj(FS));
}
static Real TopologicalCharge(const GaugeLorentz &U){
static Real TopologicalCharge(GaugeLorentz &U){
// 4d topological charge
assert(Nd==4);
// Bx = -iF(y,z), By = -iF(z,y), Bz = -iF(x,y)
@ -440,203 +389,6 @@ public:
}
//Clover-leaf Wilson loop combination for arbitrary mu-extent M and nu extent N, mu >= nu
//cf https://arxiv.org/pdf/hep-lat/9701012.pdf Eq 7 for 1x2 Wilson loop
//Clockwise ordering
static void CloverleafMxN(GaugeMat &FS, const GaugeMat &Umu, const GaugeMat &Unu, int mu, int nu, int M, int N){
#define Fmu(A) Gimpl::CovShiftForward(Umu, mu, A)
#define Bmu(A) Gimpl::CovShiftBackward(Umu, mu, A)
#define Fnu(A) Gimpl::CovShiftForward(Unu, nu, A)
#define Bnu(A) Gimpl::CovShiftBackward(Unu, nu, A)
#define FmuI Gimpl::CovShiftIdentityForward(Umu, mu)
#define BmuI Gimpl::CovShiftIdentityBackward(Umu, mu)
#define FnuI Gimpl::CovShiftIdentityForward(Unu, nu)
#define BnuI Gimpl::CovShiftIdentityBackward(Unu, nu)
//Upper right loop
GaugeMat tmp = BmuI;
for(int i=1;i<M;i++)
tmp = Bmu(tmp);
for(int j=0;j<N;j++)
tmp = Bnu(tmp);
for(int i=0;i<M;i++)
tmp = Fmu(tmp);
for(int j=0;j<N;j++)
tmp = Fnu(tmp);
FS = tmp;
//Upper left loop
tmp = BnuI;
for(int j=1;j<N;j++)
tmp = Bnu(tmp);
for(int i=0;i<M;i++)
tmp = Fmu(tmp);
for(int j=0;j<N;j++)
tmp = Fnu(tmp);
for(int i=0;i<M;i++)
tmp = Bmu(tmp);
FS = FS + tmp;
//Lower right loop
tmp = FnuI;
for(int j=1;j<N;j++)
tmp = Fnu(tmp);
for(int i=0;i<M;i++)
tmp = Bmu(tmp);
for(int j=0;j<N;j++)
tmp = Bnu(tmp);
for(int i=0;i<M;i++)
tmp = Fmu(tmp);
FS = FS + tmp;
//Lower left loop
tmp = FmuI;
for(int i=1;i<M;i++)
tmp = Fmu(tmp);
for(int j=0;j<N;j++)
tmp = Fnu(tmp);
for(int i=0;i<M;i++)
tmp = Bmu(tmp);
for(int j=0;j<N;j++)
tmp = Bnu(tmp);
FS = FS + tmp;
#undef Fmu
#undef Bmu
#undef Fnu
#undef Bnu
#undef FmuI
#undef BmuI
#undef FnuI
#undef BnuI
}
//Field strength from MxN Wilson loop
//Note F_numu = - F_munu
static void FieldStrengthMxN(GaugeMat &FS, const GaugeLorentz &U, int mu, int nu, int M, int N){
GaugeMat Umu = PeekIndex<LorentzIndex>(U, mu);
GaugeMat Unu = PeekIndex<LorentzIndex>(U, nu);
if(M == N){
GaugeMat F(Umu.Grid());
CloverleafMxN(F, Umu, Unu, mu, nu, M, N);
FS = 0.125 * ( F - adj(F) );
}else{
//Average over both orientations
GaugeMat horizontal(Umu.Grid()), vertical(Umu.Grid());
CloverleafMxN(horizontal, Umu, Unu, mu, nu, M, N);
CloverleafMxN(vertical, Umu, Unu, mu, nu, N, M);
FS = 0.0625 * ( horizontal - adj(horizontal) + vertical - adj(vertical) );
}
}
//Topological charge contribution from MxN Wilson loops
//cf https://arxiv.org/pdf/hep-lat/9701012.pdf Eq 6
//output is the charge by timeslice: sum over timeslices to obtain the total
static std::vector<Real> TimesliceTopologicalChargeMxN(const GaugeLorentz &U, int M, int N){
assert(Nd == 4);
std::vector<std::vector<GaugeMat*> > F(Nd,std::vector<GaugeMat*>(Nd,nullptr));
//Note F_numu = - F_munu
//hence we only need to loop over mu,nu,rho,sigma that aren't related by permuting mu,nu or rho,sigma
//Use nu > mu
for(int mu=0;mu<Nd-1;mu++){
for(int nu=mu+1; nu<Nd; nu++){
F[mu][nu] = new GaugeMat(U.Grid());
FieldStrengthMxN(*F[mu][nu], U, mu, nu, M, N);
}
}
Real coeff = -1./(32 * M_PI*M_PI * M*M * N*N); //overall sign to match CPS and Grid conventions, possibly related to time direction = 3 vs 0
static const int combs[3][4] = { {0,1,2,3}, {0,2,1,3}, {0,3,1,2} };
static const int signs[3] = { 1, -1, 1 }; //epsilon_{mu nu rho sigma}
ComplexField fsum(U.Grid());
fsum = Zero();
for(int c=0;c<3;c++){
int mu = combs[c][0], nu = combs[c][1], rho = combs[c][2], sigma = combs[c][3];
int eps = signs[c];
fsum = fsum + (8. * coeff * eps) * trace( (*F[mu][nu]) * (*F[rho][sigma]) );
}
for(int mu=0;mu<Nd-1;mu++)
for(int nu=mu+1; nu<Nd; nu++)
delete F[mu][nu];
typedef typename ComplexField::scalar_object sobj;
std::vector<sobj> Tq;
sliceSum(fsum, Tq, Nd-1);
std::vector<Real> out(Tq.size());
for(int t=0;t<Tq.size();t++) out[t] = TensorRemove(Tq[t]).real();
return out;
}
static Real TopologicalChargeMxN(const GaugeLorentz &U, int M, int N){
std::vector<Real> Tq = TimesliceTopologicalChargeMxN(U,M,N);
Real out(0);
for(int t=0;t<Tq.size();t++) out += Tq[t];
return out;
}
//Generate the contributions to the 5Li topological charge from Wilson loops of the following sizes
//Use coefficients from hep-lat/9701012
//1x1 : c1=(19.-55.*c5)/9.
//2x2 : c2=(1-64.*c5)/9.
//1x2 : c3=(-64.+640.*c5)/45.
//1x3 : c4=1./5.-2.*c5
//3x3 : c5=1./20.
//Output array outer index contains the loops in the above order
//Inner index is the time coordinate
static std::vector<std::vector<Real> > TimesliceTopologicalCharge5LiContributions(const GaugeLorentz &U){
static const int exts[5][2] = { {1,1}, {2,2}, {1,2}, {1,3}, {3,3} };
std::vector<std::vector<Real> > out(5);
for(int i=0;i<5;i++){
out[i] = TimesliceTopologicalChargeMxN(U,exts[i][0],exts[i][1]);
}
return out;
}
static std::vector<Real> TopologicalCharge5LiContributions(const GaugeLorentz &U){
static const int exts[5][2] = { {1,1}, {2,2}, {1,2}, {1,3}, {3,3} };
std::vector<Real> out(5);
std::cout << GridLogMessage << "Computing topological charge" << std::endl;
for(int i=0;i<5;i++){
out[i] = TopologicalChargeMxN(U,exts[i][0],exts[i][1]);
std::cout << GridLogMessage << exts[i][0] << "x" << exts[i][1] << " Wilson loop contribution " << out[i] << std::endl;
}
return out;
}
//Compute the 5Li topological charge
static std::vector<Real> TimesliceTopologicalCharge5Li(const GaugeLorentz &U){
std::vector<std::vector<Real> > loops = TimesliceTopologicalCharge5LiContributions(U);
double c5=1./20.;
double c4=1./5.-2.*c5;
double c3=(-64.+640.*c5)/45.;
double c2=(1-64.*c5)/9.;
double c1=(19.-55.*c5)/9.;
int Lt = loops[0].size();
std::vector<Real> out(Lt,0.);
for(int t=0;t<Lt;t++)
out[t] += c1*loops[0][t] + c2*loops[1][t] + c3*loops[2][t] + c4*loops[3][t] + c5*loops[4][t];
return out;
}
static Real TopologicalCharge5Li(const GaugeLorentz &U){
std::vector<Real> Qt = TimesliceTopologicalCharge5Li(U);
Real Q = 0.;
for(int t=0;t<Qt.size();t++) Q += Qt[t];
std::cout << GridLogMessage << "5Li Topological charge: " << Q << std::endl;
return Q;
}
//////////////////////////////////////////////////////
// Similar to above for rectangle is required
//////////////////////////////////////////////////////

View File

@ -1,200 +0,0 @@
// -*- C++ -*-
//===--------------------------- random -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Peter Boyle: Taken from libc++ in Clang/LLVM.
// Reason is that libstdc++ and clang differ in their return order in the normal_distribution / box mueller type step.
// standardise on one and call it "gaussian_distribution".
#pragma once
#include <cstddef>
#include <cstdint>
#include <cmath>
#include <type_traits>
#include <initializer_list>
#include <limits>
#include <algorithm>
#include <numeric>
#include <vector>
#include <string>
#include <istream>
#include <ostream>
#include <random>
// normal_distribution -> gaussian distribution
namespace Grid {
template<class _RealType = double>
class gaussian_distribution
{
public:
// types
typedef _RealType result_type;
class param_type
{
result_type __mean_;
result_type __stddev_;
public:
typedef gaussian_distribution distribution_type;
strong_inline
explicit param_type(result_type __mean = 0, result_type __stddev = 1)
: __mean_(__mean), __stddev_(__stddev) {}
strong_inline
result_type mean() const {return __mean_;}
strong_inline
result_type stddev() const {return __stddev_;}
friend strong_inline
bool operator==(const param_type& __x, const param_type& __y)
{return __x.__mean_ == __y.__mean_ && __x.__stddev_ == __y.__stddev_;}
friend strong_inline
bool operator!=(const param_type& __x, const param_type& __y)
{return !(__x == __y);}
};
private:
param_type __p_;
result_type _V_;
bool _V_hot_;
public:
// constructors and reset functions
strong_inline
explicit gaussian_distribution(result_type __mean = 0, result_type __stddev = 1)
: __p_(param_type(__mean, __stddev)), _V_hot_(false) {}
strong_inline
explicit gaussian_distribution(const param_type& __p)
: __p_(__p), _V_hot_(false) {}
strong_inline
void reset() {_V_hot_ = false;}
// generating functions
template<class _URNG>
strong_inline
result_type operator()(_URNG& __g)
{return (*this)(__g, __p_);}
template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
// property functions
strong_inline
result_type mean() const {return __p_.mean();}
strong_inline
result_type stddev() const {return __p_.stddev();}
strong_inline
param_type param() const {return __p_;}
strong_inline
void param(const param_type& __p) {__p_ = __p;}
strong_inline
result_type min() const {return -std::numeric_limits<result_type>::infinity();}
strong_inline
result_type max() const {return std::numeric_limits<result_type>::infinity();}
friend strong_inline
bool operator==(const gaussian_distribution& __x,
const gaussian_distribution& __y)
{return __x.__p_ == __y.__p_ && __x._V_hot_ == __y._V_hot_ &&
(!__x._V_hot_ || __x._V_ == __y._V_);}
friend strong_inline
bool operator!=(const gaussian_distribution& __x,
const gaussian_distribution& __y)
{return !(__x == __y);}
template <class _CharT, class _Traits, class _RT>
friend
std::basic_ostream<_CharT, _Traits>&
operator<<(std::basic_ostream<_CharT, _Traits>& __os,
const gaussian_distribution<_RT>& __x);
template <class _CharT, class _Traits, class _RT>
friend
std::basic_istream<_CharT, _Traits>&
operator>>(std::basic_istream<_CharT, _Traits>& __is,
gaussian_distribution<_RT>& __x);
};
template <class _RealType>
template<class _URNG>
_RealType
gaussian_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
{
result_type _Up;
if (_V_hot_)
{
_V_hot_ = false;
_Up = _V_;
}
else
{
std::uniform_real_distribution<result_type> _Uni(-1, 1);
result_type __u;
result_type __v;
result_type __s;
do
{
__u = _Uni(__g);
__v = _Uni(__g);
__s = __u * __u + __v * __v;
} while (__s > 1 || __s == 0);
result_type _Fp = std::sqrt(-2 * std::log(__s) / __s);
_V_ = __v * _Fp;
_V_hot_ = true;
_Up = __u * _Fp;
}
return _Up * __p.stddev() + __p.mean();
}
template <class _CharT, class _Traits, class _RT>
std::basic_ostream<_CharT, _Traits>&
operator<<(std::basic_ostream<_CharT, _Traits>& __os,
const gaussian_distribution<_RT>& __x)
{
auto __save_flags = __os.flags();
__os.flags(std::ios_base::dec | std::ios_base::left | std::ios_base::fixed |
std::ios_base::scientific);
_CharT __sp = __os.widen(' ');
__os.fill(__sp);
__os << __x.mean() << __sp << __x.stddev() << __sp << __x._V_hot_;
if (__x._V_hot_)
__os << __sp << __x._V_;
__os.flags(__save_flags);
return __os;
}
template <class _CharT, class _Traits, class _RT>
std::basic_istream<_CharT, _Traits>&
operator>>(std::basic_istream<_CharT, _Traits>& __is,
gaussian_distribution<_RT>& __x)
{
typedef gaussian_distribution<_RT> _Eng;
typedef typename _Eng::result_type result_type;
typedef typename _Eng::param_type param_type;
auto __save_flags = __is.flags();
__is.flags(std::ios_base::dec | std::ios_base::skipws);
result_type __mean;
result_type __stddev;
result_type _Vp = 0;
bool _V_hot = false;
__is >> __mean >> __stddev >> _V_hot;
if (_V_hot)
__is >> _Vp;
if (!__is.fail())
{
__x.param(param_type(__mean, __stddev));
__x._V_hot_ = _V_hot;
__x._V_ = _Vp;
}
__is.flags(__save_flags);
return __is;
}
}

Some files were not shown because too many files have changed in this diff Show More