1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-14 05:07:05 +01:00

Compare commits

..

59 Commits

Author SHA1 Message Date
8b91b61b61 First cut at faster GPU slice sum via atomics 2022-12-22 15:13:45 -05:00
4ca1bf7cca Added gauge invariance test 2022-12-21 07:23:16 -05:00
2ff868f7a5 CPU open doesn't need to free space 2022-12-20 05:10:23 -05:00
ede02b6883 Memory manager debug Felix case 2022-12-20 05:10:23 -05:00
1822ced302 Bug fix 2022-12-20 05:10:23 -05:00
37ba32776f More logging 2022-12-20 05:10:23 -05:00
99b3697b03 More loggin 2022-12-20 05:10:23 -05:00
43a45ec97b SSC_START 2022-12-20 05:10:23 -05:00
b00a4142e5 A=A fix 2022-12-20 05:10:23 -05:00
3791bc527b Logging pulled in from dirichlet branch 2022-12-20 05:10:23 -05:00
d8c29f5fcf Updated FFT test for PETSc 2022-12-18 12:05:00 -05:00
281f8101fe Matt FFT test 2022-12-17 20:35:33 -05:00
07acfe89f2 Merge pull request #417 from rrhodgson/feature/fermtoprop
Feature/fermtoprop
2022-12-06 12:45:03 -05:00
40234f531f FermToProp accelerator_for -> thread_for 2022-12-06 17:34:51 +00:00
d49694f38f PropToFerm fix 2022-12-06 15:48:54 +00:00
97a098636d FermToProp 2022-11-30 15:36:35 -05:00
e13930c8b2 Faster fermtoprop case 2022-11-30 15:11:29 -05:00
0655dab466 Open MP on host enabled 2022-11-08 13:38:54 -08:00
7f097bcc28 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2022-11-08 13:23:40 -08:00
5c75aa5008 Device mem 2022-11-08 13:22:57 -08:00
1873101362 PVC 2022-11-08 13:22:45 -08:00
63fd1dfa62 Config on PVC 2022-11-08 13:22:09 -08:00
bd68861b28 SYCL sum 2022-11-08 12:49:26 -08:00
82e959f66c SYCL reduction 2022-11-08 12:45:25 -08:00
62e52de06d Merge pull request #414 from fjosw/feat/eCloverGPU
Compact Exponential Cloverterm on GPU
2022-11-01 09:15:44 -04:00
184adeedb8 feat: renamed open_boundaries to fixedBoundaries 2022-10-26 12:53:46 +01:00
5fa6a8b96d docs: CompactClover debug info generalized. 2022-10-26 12:41:14 +01:00
a2a879b668 docs: CompactClover Debug Info improved. 2022-10-25 17:20:42 +01:00
9317d893b2 docs: details about inversion of CompactClover term added. 2022-10-25 17:10:06 +01:00
86075fdd45 feat: MassTerm and ExponentiateClover merged into InstantiateClover 2022-10-25 17:05:34 +01:00
b36442e263 feat: CloverHelpers::InvertClover implemented which handles the
inversion of the Clover term depending on clover type and the boundary
conditions.
2022-10-25 16:57:01 +01:00
513d797ea6 fix: signature of CompactWilsonCloverHelpers::Exponentiate fixed. 2022-10-25 16:17:22 +01:00
9e4835a3e3 feat: changed CompactWilsonExpClover exponentiation to Taylor expansion
with Horner scheme.
2022-10-25 15:19:43 +01:00
477ebf24f4 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2022-10-04 11:19:43 -07:00
0d5639f707 Run script update 2022-10-04 11:13:41 -07:00
413312f9a9 Benchmark the halo construction.
THe bye counts are out and should be doubled for SIMD directions
2022-10-04 11:12:59 -07:00
03508448f8 Remove verbose 2022-10-04 11:12:15 -07:00
e1e5c75023 Stencil gather improvements - SVM was running slow and used for a pointer array that wasn't needed to be in SVM 2022-10-04 11:11:10 -07:00
9296299b61 Better commenting 2022-10-04 11:10:34 -07:00
913fbca74a Merge pull request #410 from gkanwar/photon_and_sha_patches
Photon.h and SHA256 patches
2022-08-31 18:01:45 -04:00
60dfb49afa Remove FP16 tests when FP16 is disabled 2022-08-21 17:29:55 +02:00
554c238359 Update OpenSSL digest to use high-level methods
This avoids deprecation warnings when compiling against OpenSSL 3.0
but should still be backwards compatible. It is the recommended way
to use the digest API going forward.
2022-08-21 17:28:57 +02:00
f922adf05e Fix Photon ComplexField type 2022-08-21 16:16:18 +02:00
188d2c7a4d PVC default, ignore ATS 2022-08-02 08:38:53 -07:00
17d7177105 Files for SYCL 2022-08-02 08:33:39 -07:00
bb0a0da47a inon blocking caution due to SYCL 2022-08-02 08:09:43 -07:00
84110166e4 Fix the fence 2022-08-02 08:00:43 -07:00
d32b923b6c Fencing on a stream in SYCL is needed. Didn't know that ... gulp 2022-08-02 07:58:04 -07:00
2ab1af5754 Ensure no synchronize and not optoin dependent 2022-07-19 09:51:06 -07:00
5f8892bf03 Mistake pointed out by Camilo 2022-07-19 09:31:51 -07:00
f14e7e51e7 Grid accelerator 2022-07-12 10:56:22 -07:00
042ab1a052 Update GridStd.h 2022-06-27 13:21:39 -04:00
2df98a99bc Merge pull request #406 from giordano/patch-1
Update default value of gen-simd-width in README
2022-06-14 17:46:25 -04:00
315ea18be2 Update default value of gen-simd-width in README 2022-06-14 22:41:05 +01:00
a9c2e1df03 Merge pull request #404 from rrhodgson/feature/json_nvcc
Feature/json nvcc
2022-05-25 13:30:11 -04:00
da4daea57a Updated json to latest release 3.10.5 2022-05-24 16:16:06 +01:00
e346154c5d Updated json CUDA compile guards 2022-05-24 15:48:01 +01:00
3ca0de1c40 Fix json write for vector<string> 2022-05-24 14:37:33 +01:00
c7205d2a73 Removed nvcc guards for json 2022-05-24 14:30:26 +01:00
151 changed files with 16361 additions and 21517 deletions

View File

@ -36,7 +36,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/GridCore.h>
#include <Grid/qcd/QCD.h>
#include <Grid/qcd/spin/Spin.h>
#include <Grid/qcd/gparity/Gparity.h>
#include <Grid/qcd/utils/Utils.h>
#include <Grid/qcd/representations/Representations.h>
NAMESPACE_CHECK(GridQCDCore);

View File

@ -54,7 +54,6 @@ NAMESPACE_CHECK(BiCGSTAB);
#include <Grid/algorithms/iterative/SchurRedBlack.h>
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
#include <Grid/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h>
#include <Grid/algorithms/iterative/BiCGSTABMixedPrec.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
#include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>

View File

@ -262,7 +262,7 @@ public:
autoView( Tnp_v , (*Tnp), AcceleratorWrite);
autoView( Tnm_v , (*Tnm), AcceleratorWrite);
const int Nsimd = CComplex::Nsimd();
accelerator_forNB(ss, FineGrid->oSites(), Nsimd, {
accelerator_for(ss, FineGrid->oSites(), Nsimd, {
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
});

View File

@ -264,7 +264,7 @@ public:
auto Tnp_v = Tnp->View();
auto Tnm_v = Tnm->View();
constexpr int Nsimd = vector_type::Nsimd();
accelerator_forNB(ss, in.Grid()->oSites(), Nsimd, {
accelerator_for(ss, in.Grid()->oSites(), Nsimd, {
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
});

View File

@ -120,9 +120,6 @@ public:
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
GridStopWatch IterationTimer;
IterationTimer.Start();
c = cp;
MatrixTimer.Start();
@ -155,14 +152,8 @@ public:
LinearCombTimer.Stop();
LinalgTimer.Stop();
IterationTimer.Stop();
if ( (k % 500) == 0 ) {
std::cout << GridLogMessage << "ConjugateGradient: Iteration " << k
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
<< " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
} else {
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
<< " residual " << sqrt(cp/ssq) << " target " << Tolerance << " took " << IterationTimer.Elapsed() << std::endl;
}
// Stopping condition
if (cp <= rsq) {
@ -179,13 +170,13 @@ public:
<< "\tTrue residual " << true_residual
<< "\tTarget " << Tolerance << std::endl;
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInner " << InnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "Time breakdown "<<std::endl;
std::cout << GridLogIterative << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "\tInner " << InnerTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);

View File

@ -49,7 +49,6 @@ NAMESPACE_BEGIN(Grid);
Integer TotalInnerIterations; //Number of inner CG iterations
Integer TotalOuterIterations; //Number of restarts
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
RealD TrueResidual;
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
LinearFunction<FieldF> *guesser;
@ -69,7 +68,6 @@ NAMESPACE_BEGIN(Grid);
}
void operator() (const FieldD &src_d_in, FieldD &sol_d){
std::cout << GridLogMessage << "MixedPrecisionConjugateGradient: Starting mixed precision CG with outer tolerance " << Tolerance << " and inner tolerance " << InnerTolerance << std::endl;
TotalInnerIterations = 0;
GridStopWatch TotalTimer;
@ -99,7 +97,6 @@ NAMESPACE_BEGIN(Grid);
FieldF sol_f(SinglePrecGrid);
sol_f.Checkerboard() = cb;
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting initial inner CG with tolerance " << inner_tol << std::endl;
ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations);
CG_f.ErrorOnNoConverge = false;
@ -133,7 +130,6 @@ NAMESPACE_BEGIN(Grid);
(*guesser)(src_f, sol_f);
//Inner CG
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " << outer_iter << " starting inner CG with tolerance " << inner_tol << std::endl;
CG_f.Tolerance = inner_tol;
InnerCGtimer.Start();
CG_f(Linop_f, src_f, sol_f);
@ -154,7 +150,6 @@ NAMESPACE_BEGIN(Grid);
ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
CG_d(Linop_d, src_d_in, sol_d);
TotalFinalStepIterations = CG_d.IterationsToComplete;
TrueResidual = CG_d.TrueResidual;
TotalTimer.Stop();
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl;

View File

@ -44,7 +44,7 @@ public:
using OperatorFunction<Field>::operator();
// RealD Tolerance;
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
std::vector<int> IterationsToCompleteShift; // Iterations for this shift
@ -52,7 +52,7 @@ public:
MultiShiftFunction shifts;
std::vector<RealD> TrueResidualShift;
ConjugateGradientMultiShift(Integer maxit, const MultiShiftFunction &_shifts) :
ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) :
MaxIterations(maxit),
shifts(_shifts)
{
@ -182,9 +182,6 @@ public:
for(int s=0;s<nshift;s++) {
axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
}
std::cout << GridLogIterative << "ConjugateGradientMultiShift: initial rn (|src|^2) =" << rn << " qq (|MdagM src|^2) =" << qq << " d ( dot(src, [MdagM + m_0]src) ) =" << d << " c=" << c << std::endl;
///////////////////////////////////////
// Timers
@ -324,8 +321,8 @@ public:
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMarix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tShift " << ShiftTimer.Elapsed() <<std::endl;
IterationsToComplete = k;

View File

@ -1,409 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Christopher Kelly <ckelly@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H
#define GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H
NAMESPACE_BEGIN(Grid);
//CK 2020: A variant of the multi-shift conjugate gradient with the matrix multiplication in single precision.
//The residual is stored in single precision, but the search directions and solution are stored in double precision.
//Every update_freq iterations the residual is corrected in double precision.
//For safety the a final regular CG is applied to clean up if necessary
//Linop to add shift to input linop, used in cleanup CG
namespace ConjugateGradientMultiShiftMixedPrecSupport{
template<typename Field>
class ShiftedLinop: public LinearOperatorBase<Field>{
public:
LinearOperatorBase<Field> &linop_base;
RealD shift;
ShiftedLinop(LinearOperatorBase<Field> &_linop_base, RealD _shift): linop_base(_linop_base), shift(_shift){}
void OpDiag (const Field &in, Field &out){ assert(0); }
void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); }
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); }
void Op (const Field &in, Field &out){ assert(0); }
void AdjOp (const Field &in, Field &out){ assert(0); }
void HermOp(const Field &in, Field &out){
linop_base.HermOp(in, out);
axpy(out, shift, in, out);
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
HermOp(in,out);
ComplexD dot = innerProduct(in,out);
n1=real(dot);
n2=norm2(out);
}
};
};
template<class FieldD, class FieldF,
typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
class ConjugateGradientMultiShiftMixedPrec : public OperatorMultiFunction<FieldD>,
public OperatorFunction<FieldD>
{
public:
using OperatorFunction<FieldD>::operator();
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
std::vector<int> IterationsToCompleteShift; // Iterations for this shift
int verbose;
MultiShiftFunction shifts;
std::vector<RealD> TrueResidualShift;
int ReliableUpdateFreq; //number of iterations between reliable updates
GridBase* SinglePrecGrid; //Grid for single-precision fields
LinearOperatorBase<FieldF> &Linop_f; //single precision
ConjugateGradientMultiShiftMixedPrec(Integer maxit, const MultiShiftFunction &_shifts,
GridBase* _SinglePrecGrid, LinearOperatorBase<FieldF> &_Linop_f,
int _ReliableUpdateFreq
) :
MaxIterations(maxit), shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq)
{
verbose=1;
IterationsToCompleteShift.resize(_shifts.order);
TrueResidualShift.resize(_shifts.order);
}
void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, FieldD &psi)
{
GridBase *grid = src.Grid();
int nshift = shifts.order;
std::vector<FieldD> results(nshift,grid);
(*this)(Linop,src,results,psi);
}
void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, std::vector<FieldD> &results, FieldD &psi)
{
int nshift = shifts.order;
(*this)(Linop,src,results);
psi = shifts.norm*src;
for(int i=0;i<nshift;i++){
psi = psi + shifts.residues[i]*results[i];
}
return;
}
void operator() (LinearOperatorBase<FieldD> &Linop_d, const FieldD &src_d, std::vector<FieldD> &psi_d)
{
GridBase *DoublePrecGrid = src_d.Grid();
////////////////////////////////////////////////////////////////////////
// Convenience references to the info stored in "MultiShiftFunction"
////////////////////////////////////////////////////////////////////////
int nshift = shifts.order;
std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts"
std::vector<RealD> &mresidual(shifts.tolerances);
std::vector<RealD> alpha(nshift,1.0);
//Double precision search directions
FieldD p_d(DoublePrecGrid);
std::vector<FieldD> ps_d(nshift, DoublePrecGrid);// Search directions (double precision)
FieldD tmp_d(DoublePrecGrid);
FieldD r_d(DoublePrecGrid);
FieldD mmp_d(DoublePrecGrid);
assert(psi_d.size()==nshift);
assert(mass.size()==nshift);
assert(mresidual.size()==nshift);
// dynamic sized arrays on stack; 2d is a pain with vector
RealD bs[nshift];
RealD rsq[nshift];
RealD z[nshift][2];
int converged[nshift];
const int primary =0;
//Primary shift fields CG iteration
RealD a,b,c,d;
RealD cp,bp,qq; //prev
// Matrix mult fields
FieldF r_f(SinglePrecGrid);
FieldF p_f(SinglePrecGrid);
FieldF tmp_f(SinglePrecGrid);
FieldF mmp_f(SinglePrecGrid);
FieldF src_f(SinglePrecGrid);
precisionChange(src_f, src_d);
// Check lightest mass
for(int s=0;s<nshift;s++){
assert( mass[s]>= mass[primary] );
converged[s]=0;
}
// Wire guess to zero
// Residuals "r" are src
// First search direction "p" is also src
cp = norm2(src_d);
// Handle trivial case of zero src.
if( cp == 0. ){
for(int s=0;s<nshift;s++){
psi_d[s] = Zero();
IterationsToCompleteShift[s] = 1;
TrueResidualShift[s] = 0.;
}
return;
}
for(int s=0;s<nshift;s++){
rsq[s] = cp * mresidual[s] * mresidual[s];
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift "<< s <<" target resid "<<rsq[s]<<std::endl;
ps_d[s] = src_d;
}
// r and p for primary
r_f=src_f; //residual maintained in single
p_f=src_f;
p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys
//MdagM+m[0]
Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
axpy(mmp_f,mass[0],p_f,mmp_f);
RealD rn = norm2(p_f);
d += rn*mass[0];
b = -cp /d;
// Set up the various shift variables
int iz=0;
z[0][1-iz] = 1.0;
z[0][iz] = 1.0;
bs[0] = b;
for(int s=1;s<nshift;s++){
z[s][1-iz] = 1.0;
z[s][iz] = 1.0/( 1.0 - b*(mass[s]-mass[0]));
bs[s] = b*z[s][iz];
}
// r += b[0] A.p[0]
// c= norm(r)
c=axpy_norm(r_f,b,mmp_f,r_f);
for(int s=0;s<nshift;s++) {
axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d);
}
///////////////////////////////////////
// Timers
///////////////////////////////////////
GridStopWatch AXPYTimer, ShiftTimer, QRTimer, MatrixTimer, SolverTimer, PrecChangeTimer, CleanupTimer;
SolverTimer.Start();
// Iteration loop
int k;
for (k=1;k<=MaxIterations;k++){
a = c /cp;
//Update double precision search direction by residual
PrecChangeTimer.Start();
precisionChange(r_d, r_f);
PrecChangeTimer.Stop();
AXPYTimer.Start();
axpy(p_d,a,p_d,r_d);
for(int s=0;s<nshift;s++){
if ( ! converged[s] ) {
if (s==0){
axpy(ps_d[s],a,ps_d[s],r_d);
} else{
RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b);
axpby(ps_d[s],z[s][iz],as,r_d,ps_d[s]);
}
}
}
AXPYTimer.Stop();
PrecChangeTimer.Start();
precisionChange(p_f, p_d); //get back single prec search direction for linop
PrecChangeTimer.Stop();
cp=c;
MatrixTimer.Start();
Linop_f.HermOp(p_f,mmp_f);
d=real(innerProduct(p_f,mmp_f));
MatrixTimer.Stop();
AXPYTimer.Start();
axpy(mmp_f,mass[0],p_f,mmp_f);
AXPYTimer.Stop();
RealD rn = norm2(p_f);
d += rn*mass[0];
bp=b;
b=-cp/d;
// Toggle the recurrence history
bs[0] = b;
iz = 1-iz;
ShiftTimer.Start();
for(int s=1;s<nshift;s++){
if((!converged[s])){
RealD z0 = z[s][1-iz];
RealD z1 = z[s][iz];
z[s][iz] = z0*z1*bp
/ (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
}
}
ShiftTimer.Stop();
//Update double precision solutions
AXPYTimer.Start();
for(int s=0;s<nshift;s++){
int ss = s;
if( (!converged[s]) ) {
axpy(psi_d[ss],-bs[s]*alpha[s],ps_d[s],psi_d[ss]);
}
}
//Perform reliable update if necessary; otherwise update residual from single-prec mmp
RealD c_f = axpy_norm(r_f,b,mmp_f,r_f);
AXPYTimer.Stop();
c = c_f;
if(k % ReliableUpdateFreq == 0){
//Replace r with true residual
MatrixTimer.Start();
Linop_d.HermOp(psi_d[0],mmp_d);
MatrixTimer.Stop();
AXPYTimer.Start();
axpy(mmp_d,mass[0],psi_d[0],mmp_d);
RealD c_d = axpy_norm(r_d, -1.0, mmp_d, src_d);
AXPYTimer.Stop();
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<< ", replaced |r|^2 = "<<c_f <<" with |r|^2 = "<<c_d<<std::endl;
PrecChangeTimer.Start();
precisionChange(r_f, r_d);
PrecChangeTimer.Stop();
c = c_d;
}
// Convergence checks
int all_converged = 1;
for(int s=0;s<nshift;s++){
if ( (!converged[s]) ){
IterationsToCompleteShift[s] = k;
RealD css = c * z[s][iz]* z[s][iz];
if(css<rsq[s]){
if ( ! converged[s] )
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
converged[s]=1;
} else {
all_converged=0;
}
}
}
if ( all_converged ){
SolverTimer.Stop();
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: All shifts have converged iteration "<<k<<std::endl;
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: Checking solutions"<<std::endl;
// Check answers
for(int s=0; s < nshift; s++) {
Linop_d.HermOpAndNorm(psi_d[s],mmp_d,d,qq);
axpy(tmp_d,mass[s],psi_d[s],mmp_d);
axpy(r_d,-alpha[s],src_d,tmp_d);
RealD rn = norm2(r_d);
RealD cn = norm2(src_d);
TrueResidualShift[s] = std::sqrt(rn/cn);
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift["<<s<<"] true residual "<< TrueResidualShift[s] << " target " << mresidual[s] << std::endl;
//If we have not reached the desired tolerance, do a (mixed precision) CG cleanup
if(rn >= rsq[s]){
CleanupTimer.Start();
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: performing cleanup step for shift " << s << std::endl;
//Setup linear operators for final cleanup
ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldD> Linop_shift_d(Linop_d, mass[s]);
ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldF> Linop_shift_f(Linop_f, mass[s]);
MixedPrecisionConjugateGradient<FieldD,FieldF> cg(mresidual[s], MaxIterations, MaxIterations, SinglePrecGrid, Linop_shift_f, Linop_shift_d);
cg(src_d, psi_d[s]);
TrueResidualShift[s] = cg.TrueResidual;
CleanupTimer.Stop();
}
}
std::cout << GridLogMessage << "ConjugateGradientMultiShiftMixedPrec: Time Breakdown for body"<<std::endl;
std::cout << GridLogMessage << "\tSolver " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\t\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\t\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\t\tShift " << ShiftTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\t\tPrecision Change " << PrecChangeTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tFinal Cleanup " << CleanupTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tSolver+Cleanup " << SolverTimer.Elapsed() + CleanupTimer.Elapsed() << std::endl;
IterationsToComplete = k;
return;
}
}
// ugly hack
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
// assert(0);
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -44,7 +44,6 @@ public:
int, MinRes); // Must restart
};
//This class is the input parameter class for some testing programs
struct LocalCoherenceLanczosParams : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
@ -146,24 +145,16 @@ public:
LinearOperatorBase<FineField> &_Linop;
RealD _coarse_relax_tol;
std::vector<FineField> &_subspace;
int _largestEvalIdxForReport; //The convergence of the LCL is based on the evals of the coarse grid operator, not those of the underlying fine grid operator
//As a result we do not know what the eval range of the fine operator is until the very end, making tuning the Cheby bounds very difficult
//To work around this issue, every restart we separately reconstruct the fine operator eval for the lowest and highest evec and print these
//out alongside the evals of the coarse operator. To do so we need to know the index of the largest eval (i.e. Nstop-1)
//NOTE: If largestEvalIdxForReport=-1 (default) then this is not performed
ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly,
OperatorFunction<FineField> &smoother,
LinearOperatorBase<FineField> &Linop,
std::vector<FineField> &subspace,
RealD coarse_relax_tol=5.0e3,
int largestEvalIdxForReport=-1)
RealD coarse_relax_tol=5.0e3)
: _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace),
_coarse_relax_tol(coarse_relax_tol), _largestEvalIdxForReport(largestEvalIdxForReport)
_coarse_relax_tol(coarse_relax_tol)
{ };
//evalMaxApprox: approximation of largest eval of the fine Chebyshev operator (suitably wrapped by block projection)
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
{
CoarseField v(B);
@ -186,26 +177,12 @@ public:
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
<<std::endl;
if(_largestEvalIdxForReport != -1 && (j==0 || j==_largestEvalIdxForReport)){
std::cout<<GridLogIRL << "Estimating true eval of fine grid operator for eval idx " << j << std::endl;
RealD tmp_eval;
ReconstructEval(j,eresid,B,tmp_eval,1.0); //don't use evalMaxApprox of coarse operator! (cf below)
}
int conv=0;
if( (vv<eresid*eresid) ) conv = 1;
return conv;
}
//This function is called at the end of the coarse grid Lanczos. It promotes the coarse eigenvector 'B' to the fine grid,
//applies a smoother to the result then computes the computes the *fine grid* eigenvalue (output as 'eval').
//evalMaxApprox should be the approximation of the largest eval of the fine Hermop. However when this function is called by IRL it actually passes the largest eval of the *Chebyshev* operator (as this is the max approx used for the TestConvergence above)
//As the largest eval of the Chebyshev is typically several orders of magnitude larger this makes the convergence test pass even when it should not.
//We therefore ignore evalMaxApprox here and use a value of 1.0 (note this value is already used by TestCoarse)
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
{
evalMaxApprox = 1.0; //cf above
GridBase *FineGrid = _subspace[0].Grid();
int checkerboard = _subspace[0].Checkerboard();
FineField fB(FineGrid);fB.Checkerboard() =checkerboard;
@ -224,13 +201,13 @@ public:
eval = vnum/vden;
fv -= eval*fB;
RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0);
if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
std::cout.precision(13);
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv << " target " << eresid*eresid
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
<<std::endl;
if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
if( (vv<eresid*eresid) ) return 1;
return 0;
}
@ -308,10 +285,6 @@ public:
evals_coarse.resize(0);
};
//The block inner product is the inner product on the fine grid locally summed over the blocks
//to give a Lattice<Scalar> on the coarse grid. This function orthnormalizes the fine-grid subspace
//vectors under the block inner product. This step must be performed after computing the fine grid
//eigenvectors and before computing the coarse grid eigenvectors.
void Orthogonalise(void ) {
CoarseScalar InnerProd(_CoarseGrid);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
@ -355,8 +328,6 @@ public:
}
}
//While this method serves to check the coarse eigenvectors, it also recomputes the eigenvalues from the smoothed reconstructed eigenvectors
//hence the smoother can be tuned after running the coarse Lanczos by using a different smoother here
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
{
assert(evals_fine.size() == nbasis);
@ -405,31 +376,25 @@ public:
evals_fine.resize(nbasis);
subspace.resize(nbasis,_FineGrid);
}
//cheby_op: Parameters of the fine grid Chebyshev polynomial used for the Lanczos acceleration
//cheby_smooth: Parameters of a separate Chebyshev polynomial used after the Lanczos has completed to smooth out high frequency noise in the reconstructed fine grid eigenvectors prior to computing the eigenvalue
//relax: Reconstructed eigenvectors (post smoothing) are naturally not as precise as true eigenvectors. This factor acts as a multiplier on the stopping condition when determining whether the results satisfy the user provided stopping condition
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
int Nstop, int Nk, int Nm,RealD resid,
RealD MaxIt, RealD betastp, int MinRes)
{
Chebyshev<FineField> Cheby(cheby_op); //Chebyshev of fine operator on fine grid
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace); //Fine operator on coarse grid with intermediate fine grid conversion
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace); //Chebyshev of fine operator on coarse grid with intermediate fine grid conversion
Chebyshev<FineField> Cheby(cheby_op);
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
//////////////////////////////////////////////////////////////////////////////////////////////////
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
//////////////////////////////////////////////////////////////////////////////////////////////////
Chebyshev<FineField> ChebySmooth(cheby_smooth); //lower order Chebyshev of fine operator on fine grid used to smooth regenerated eigenvectors
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax,Nstop-1);
Chebyshev<FineField> ChebySmooth(cheby_smooth);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
evals_coarse.resize(Nm);
evec_coarse.resize(Nm,_CoarseGrid);
CoarseField src(_CoarseGrid); src=1.0;
//Note the "tester" here is also responsible for generating the fine grid eigenvalues which are output into the "evals_coarse" array
ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
int Nconv=0;
IRL.calc(evals_coarse,evec_coarse,src,Nconv,false);
@ -440,14 +405,6 @@ public:
std::cout << i << " Coarse eval = " << evals_coarse[i] << std::endl;
}
}
//Get the fine eigenvector 'i' by reconstruction
void getFineEvecEval(FineField &evec, RealD &eval, const int i) const{
blockPromote(evec_coarse[i],evec,subspace);
eval = evals_coarse[i];
}
};
NAMESPACE_END(Grid);

View File

@ -29,8 +29,6 @@ template<class Field> class PowerMethod
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
RealD vden = norm2(src_n);
RealD na = vnum/vden;
std::cout << GridLogIterative << "PowerMethod: Current approximation of largest eigenvalue " << na << std::endl;
if ( (fabs(evalMaxApprox/na - 1.0) < 0.001) || (i==_MAX_ITER_EST_-1) ) {
evalMaxApprox = na;

View File

@ -40,7 +40,7 @@ void MemoryManager::PrintBytes(void)
//////////////////////////////////////////////////////////////////////
MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax];
int MemoryManager::Victim[MemoryManager::NallocType];
int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 2, 8, 2, 8 };
int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 8, 16, 8, 16 };
uint64_t MemoryManager::CacheBytes[MemoryManager::NallocType];
//////////////////////////////////////////////////////////////////////
// Actual allocation and deallocation utils

View File

@ -36,6 +36,11 @@ NAMESPACE_BEGIN(Grid);
#define GRID_ALLOC_SMALL_LIMIT (4096)
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
#define FILE_LINE __FILE__ ":" TOSTRING(__LINE__)
#define AUDIT(a) MemoryManager::Audit(FILE_LINE)
/*Pinning pages is costly*/
////////////////////////////////////////////////////////////////////////////
// Advise the LatticeAccelerator class
@ -92,8 +97,9 @@ private:
static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim,uint64_t &cbytes) ;
static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t &cbytes) ;
static void PrintBytes(void);
public:
static void PrintBytes(void);
static void Audit(std::string s);
static void Init(void);
static void InitMessage(void);
static void *AcceleratorAllocate(size_t bytes);
@ -113,6 +119,8 @@ private:
static uint64_t DeviceToHostBytes;
static uint64_t HostToDeviceXfer;
static uint64_t DeviceToHostXfer;
static uint64_t DeviceEvictions;
static uint64_t DeviceDestroy;
private:
#ifndef GRID_UVM
@ -170,6 +178,7 @@ private:
public:
static void Print(void);
static void PrintAll(void);
static void PrintState( void* CpuPtr);
static int isOpen (void* CpuPtr);
static void ViewClose(void* CpuPtr,ViewMode mode);

View File

@ -3,8 +3,13 @@
#warning "Using explicit device memory copies"
NAMESPACE_BEGIN(Grid);
//#define dprintf(...) printf ( __VA_ARGS__ ); fflush(stdout);
#define dprintf(...)
#define MAXLINE 512
static char print_buffer [ MAXLINE ];
#define mprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer;
#define dprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer;
//#define dprintf(...)
////////////////////////////////////////////////////////////
@ -23,6 +28,8 @@ uint64_t MemoryManager::HostToDeviceBytes;
uint64_t MemoryManager::DeviceToHostBytes;
uint64_t MemoryManager::HostToDeviceXfer;
uint64_t MemoryManager::DeviceToHostXfer;
uint64_t MemoryManager::DeviceEvictions;
uint64_t MemoryManager::DeviceDestroy;
////////////////////////////////////
// Priority ordering for unlocked entries
@ -104,15 +111,17 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
///////////////////////////////////////////////////////////
assert(AccCache.state!=Empty);
dprintf("MemoryManager: Discard(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
mprintf("MemoryManager: Discard(%lx) %lx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
assert(AccCache.accLock==0);
assert(AccCache.cpuLock==0);
assert(AccCache.CpuPtr!=(uint64_t)NULL);
if(AccCache.AccPtr) {
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
DeviceDestroy++;
DeviceBytes -=AccCache.bytes;
LRUremove(AccCache);
dprintf("MemoryManager: Free(%llx) LRU %lld Total %lld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes);
AccCache.AccPtr=(uint64_t) NULL;
dprintf("MemoryManager: Free(%lx) LRU %ld Total %ld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes);
}
uint64_t CpuPtr = AccCache.CpuPtr;
EntryErase(CpuPtr);
@ -121,26 +130,36 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
{
///////////////////////////////////////////////////////////////////////////
// Make CPU consistent, remove from Accelerator, remove entry
// Cannot be locked. If allocated must be in LRU pool.
// Make CPU consistent, remove from Accelerator, remove from LRU, LEAVE CPU only entry
// Cannot be acclocked. If allocated must be in LRU pool.
//
// Nov 2022... Felix issue: Allocating two CpuPtrs, can have an entry in LRU-q with CPUlock.
// and require to evict the AccPtr copy. Eviction was a mistake in CpuViewOpen
// but there is a weakness where CpuLock entries are attempted for erase
// Take these OUT LRU queue when CPU locked?
// Cannot take out the table as cpuLock data is important.
///////////////////////////////////////////////////////////////////////////
assert(AccCache.state!=Empty);
dprintf("MemoryManager: Evict(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
assert(AccCache.accLock==0);
assert(AccCache.cpuLock==0);
mprintf("MemoryManager: Evict cpu %lx acc %lx cpuLock %ld accLock %ld\n",
(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr,
(uint64_t)AccCache.cpuLock,(uint64_t)AccCache.accLock);
assert(AccCache.accLock==0); // Cannot evict so logic bomb
assert(AccCache.CpuPtr!=(uint64_t)NULL);
if(AccCache.state==AccDirty) {
Flush(AccCache);
}
assert(AccCache.CpuPtr!=(uint64_t)NULL);
if(AccCache.AccPtr) {
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
DeviceBytes -=AccCache.bytes;
LRUremove(AccCache);
dprintf("MemoryManager: Free(%llx) footprint now %lld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
AccCache.AccPtr=(uint64_t)NULL;
AccCache.state=CpuDirty; // CPU primary now
DeviceBytes -=AccCache.bytes;
dprintf("MemoryManager: Free(%lx) footprint now %ld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
}
uint64_t CpuPtr = AccCache.CpuPtr;
EntryErase(CpuPtr);
// uint64_t CpuPtr = AccCache.CpuPtr;
DeviceEvictions++;
// EntryErase(CpuPtr);
}
void MemoryManager::Flush(AcceleratorViewEntry &AccCache)
{
@ -150,7 +169,7 @@ void MemoryManager::Flush(AcceleratorViewEntry &AccCache)
assert(AccCache.AccPtr!=(uint64_t)NULL);
assert(AccCache.CpuPtr!=(uint64_t)NULL);
acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes);
dprintf("MemoryManager: Flush %llx -> %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
mprintf("MemoryManager: Flush %lx -> %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
DeviceToHostBytes+=AccCache.bytes;
DeviceToHostXfer++;
AccCache.state=Consistent;
@ -165,7 +184,7 @@ void MemoryManager::Clone(AcceleratorViewEntry &AccCache)
AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes);
DeviceBytes+=AccCache.bytes;
}
dprintf("MemoryManager: Clone %llx <- %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
mprintf("MemoryManager: Clone %lx <- %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes);
HostToDeviceBytes+=AccCache.bytes;
HostToDeviceXfer++;
@ -191,6 +210,7 @@ void MemoryManager::CpuDiscard(AcceleratorViewEntry &AccCache)
void MemoryManager::ViewClose(void* Ptr,ViewMode mode)
{
if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){
dprintf("AcceleratorViewClose %lx\n",(uint64_t)Ptr);
AcceleratorViewClose((uint64_t)Ptr);
} else if( (mode==CpuRead)||(mode==CpuWrite)){
CpuViewClose((uint64_t)Ptr);
@ -202,6 +222,7 @@ void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvis
{
uint64_t CpuPtr = (uint64_t)_CpuPtr;
if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){
dprintf("AcceleratorViewOpen %lx\n",(uint64_t)CpuPtr);
return (void *) AcceleratorViewOpen(CpuPtr,bytes,mode,hint);
} else if( (mode==CpuRead)||(mode==CpuWrite)){
return (void *)CpuViewOpen(CpuPtr,bytes,mode,hint);
@ -212,13 +233,16 @@ void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvis
}
void MemoryManager::EvictVictims(uint64_t bytes)
{
assert(bytes<DeviceMaxBytes);
while(bytes+DeviceLRUBytes > DeviceMaxBytes){
if ( DeviceLRUBytes > 0){
assert(LRU.size()>0);
uint64_t victim = LRU.back();
uint64_t victim = LRU.back(); // From the LRU
auto AccCacheIterator = EntryLookup(victim);
auto & AccCache = AccCacheIterator->second;
Evict(AccCache);
} else {
return;
}
}
}
@ -241,11 +265,12 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
assert(AccCache.cpuLock==0); // Programming error
if(AccCache.state!=Empty) {
dprintf("ViewOpen found entry %llx %llx : %lld %lld\n",
dprintf("ViewOpen found entry %lx %lx : %ld %ld accLock %ld\n",
(uint64_t)AccCache.CpuPtr,
(uint64_t)CpuPtr,
(uint64_t)AccCache.bytes,
(uint64_t)bytes);
(uint64_t)bytes,
(uint64_t)AccCache.accLock);
assert(AccCache.CpuPtr == CpuPtr);
assert(AccCache.bytes ==bytes);
}
@ -280,6 +305,7 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
AccCache.state = Consistent; // Empty + AccRead => Consistent
}
AccCache.accLock= 1;
dprintf("Copied Empty entry into device accLock= %d\n",AccCache.accLock);
} else if(AccCache.state==CpuDirty ){
if(mode==AcceleratorWriteDiscard) {
CpuDiscard(AccCache);
@ -292,28 +318,30 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
AccCache.state = Consistent; // CpuDirty + AccRead => Consistent
}
AccCache.accLock++;
dprintf("Copied CpuDirty entry into device accLock %d\n",AccCache.accLock);
dprintf("CpuDirty entry into device ++accLock= %d\n",AccCache.accLock);
} else if(AccCache.state==Consistent) {
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
AccCache.state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty
else
AccCache.state = Consistent; // Consistent + AccRead => Consistent
AccCache.accLock++;
dprintf("Consistent entry into device accLock %d\n",AccCache.accLock);
dprintf("Consistent entry into device ++accLock= %d\n",AccCache.accLock);
} else if(AccCache.state==AccDirty) {
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
AccCache.state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty
else
AccCache.state = AccDirty; // AccDirty + AccRead => AccDirty
AccCache.accLock++;
dprintf("AccDirty entry into device accLock %d\n",AccCache.accLock);
dprintf("AccDirty entry ++accLock= %d\n",AccCache.accLock);
} else {
assert(0);
}
// If view is opened on device remove from LRU
assert(AccCache.accLock>0);
// If view is opened on device must remove from LRU
if(AccCache.LRU_valid==1){
// must possibly remove from LRU as now locked on GPU
dprintf("AccCache entry removed from LRU \n");
LRUremove(AccCache);
}
@ -334,10 +362,12 @@ void MemoryManager::AcceleratorViewClose(uint64_t CpuPtr)
assert(AccCache.accLock>0);
AccCache.accLock--;
// Move to LRU queue if not locked and close on device
if(AccCache.accLock==0) {
dprintf("AccleratorViewClose %lx AccLock decremented to %ld move to LRU queue\n",(uint64_t)CpuPtr,(uint64_t)AccCache.accLock);
LRUinsert(AccCache);
} else {
dprintf("AccleratorViewClose %lx AccLock decremented to %ld\n",(uint64_t)CpuPtr,(uint64_t)AccCache.accLock);
}
}
void MemoryManager::CpuViewClose(uint64_t CpuPtr)
@ -374,9 +404,10 @@ uint64_t MemoryManager::CpuViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,V
auto AccCacheIterator = EntryLookup(CpuPtr);
auto & AccCache = AccCacheIterator->second;
if (!AccCache.AccPtr) {
EvictVictims(bytes);
}
// CPU doesn't need to free space
// if (!AccCache.AccPtr) {
// EvictVictims(bytes);
// }
assert((mode==CpuRead)||(mode==CpuWrite));
assert(AccCache.accLock==0); // Programming error
@ -430,20 +461,28 @@ void MemoryManager::NotifyDeletion(void *_ptr)
void MemoryManager::Print(void)
{
PrintBytes();
std::cout << GridLogDebug << "--------------------------------------------" << std::endl;
std::cout << GridLogDebug << "Memory Manager " << std::endl;
std::cout << GridLogDebug << "--------------------------------------------" << std::endl;
std::cout << GridLogDebug << DeviceBytes << " bytes allocated on device " << std::endl;
std::cout << GridLogDebug << DeviceLRUBytes<< " bytes evictable on device " << std::endl;
std::cout << GridLogDebug << DeviceMaxBytes<< " bytes max on device " << std::endl;
std::cout << GridLogDebug << HostToDeviceXfer << " transfers to device " << std::endl;
std::cout << GridLogDebug << DeviceToHostXfer << " transfers from device " << std::endl;
std::cout << GridLogDebug << HostToDeviceBytes<< " bytes transfered to device " << std::endl;
std::cout << GridLogDebug << DeviceToHostBytes<< " bytes transfered from device " << std::endl;
std::cout << GridLogDebug << AccViewTable.size()<< " vectors " << LRU.size()<<" evictable"<< std::endl;
std::cout << GridLogDebug << "--------------------------------------------" << std::endl;
std::cout << GridLogDebug << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "<<std::endl;
std::cout << GridLogDebug << "--------------------------------------------" << std::endl;
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
std::cout << GridLogMessage << "Memory Manager " << std::endl;
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
std::cout << GridLogMessage << DeviceBytes << " bytes allocated on device " << std::endl;
std::cout << GridLogMessage << DeviceLRUBytes<< " bytes evictable on device " << std::endl;
std::cout << GridLogMessage << DeviceMaxBytes<< " bytes max on device " << std::endl;
std::cout << GridLogMessage << HostToDeviceXfer << " transfers to device " << std::endl;
std::cout << GridLogMessage << DeviceToHostXfer << " transfers from device " << std::endl;
std::cout << GridLogMessage << HostToDeviceBytes<< " bytes transfered to device " << std::endl;
std::cout << GridLogMessage << DeviceToHostBytes<< " bytes transfered from device " << std::endl;
std::cout << GridLogMessage << DeviceEvictions << " Evictions from device " << std::endl;
std::cout << GridLogMessage << DeviceDestroy << " Destroyed vectors on device " << std::endl;
std::cout << GridLogMessage << AccViewTable.size()<< " vectors " << LRU.size()<<" evictable"<< std::endl;
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
}
void MemoryManager::PrintAll(void)
{
Print();
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
std::cout << GridLogMessage << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "<<std::endl;
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
for(auto it=AccViewTable.begin();it!=AccViewTable.end();it++){
auto &AccCache = it->second;
@ -453,13 +492,13 @@ void MemoryManager::Print(void)
if ( AccCache.state==AccDirty ) str = std::string("AccDirty");
if ( AccCache.state==Consistent)str = std::string("Consistent");
std::cout << GridLogDebug << "0x"<<std::hex<<AccCache.CpuPtr<<std::dec
std::cout << GridLogMessage << "0x"<<std::hex<<AccCache.CpuPtr<<std::dec
<< "\t0x"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str
<< "\t" << AccCache.cpuLock
<< "\t" << AccCache.accLock
<< "\t" << AccCache.LRU_valid<<std::endl;
}
std::cout << GridLogDebug << "--------------------------------------------" << std::endl;
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
};
int MemoryManager::isOpen (void* _CpuPtr)
@ -473,6 +512,61 @@ int MemoryManager::isOpen (void* _CpuPtr)
return 0;
}
}
void MemoryManager::Audit(std::string s)
{
uint64_t CpuBytes=0;
uint64_t AccBytes=0;
uint64_t LruBytes1=0;
uint64_t LruBytes2=0;
uint64_t LruCnt=0;
uint64_t LockedBytes=0;
std::cout << " Memory Manager::Audit() from "<<s<<std::endl;
for(auto it=LRU.begin();it!=LRU.end();it++){
uint64_t cpuPtr = *it;
assert(EntryPresent(cpuPtr));
auto AccCacheIterator = EntryLookup(cpuPtr);
auto & AccCache = AccCacheIterator->second;
LruBytes2+=AccCache.bytes;
assert(AccCache.LRU_valid==1);
assert(AccCache.LRU_entry==it);
}
std::cout << " Memory Manager::Audit() LRU queue matches table entries "<<std::endl;
for(auto it=AccViewTable.begin();it!=AccViewTable.end();it++){
auto &AccCache = it->second;
std::string str;
if ( AccCache.state==Empty ) str = std::string("Empty");
if ( AccCache.state==CpuDirty ) str = std::string("CpuDirty");
if ( AccCache.state==AccDirty ) str = std::string("AccDirty");
if ( AccCache.state==Consistent)str = std::string("Consistent");
CpuBytes+=AccCache.bytes;
if( AccCache.AccPtr ) AccBytes+=AccCache.bytes;
if( AccCache.LRU_valid ) LruBytes1+=AccCache.bytes;
if( AccCache.LRU_valid ) LruCnt++;
if ( AccCache.cpuLock || AccCache.accLock ) {
assert(AccCache.LRU_valid==0);
std::cout << GridLogError << s<< "\n\t 0x"<<std::hex<<AccCache.CpuPtr<<std::dec
<< "\t0x"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str
<< "\t cpuLock " << AccCache.cpuLock
<< "\t accLock " << AccCache.accLock
<< "\t LRUvalid " << AccCache.LRU_valid<<std::endl;
}
assert( AccCache.cpuLock== 0 ) ;
assert( AccCache.accLock== 0 ) ;
}
std::cout << " Memory Manager::Audit() no locked table entries "<<std::endl;
assert(LruBytes1==LruBytes2);
assert(LruBytes1==DeviceLRUBytes);
std::cout << " Memory Manager::Audit() evictable bytes matches sum over table "<<std::endl;
assert(AccBytes==DeviceBytes);
std::cout << " Memory Manager::Audit() device bytes matches sum over table "<<std::endl;
assert(LruCnt == LRU.size());
std::cout << " Memory Manager::Audit() LRU entry count matches "<<std::endl;
}
void MemoryManager::PrintState(void* _CpuPtr)
{
@ -489,8 +583,8 @@ void MemoryManager::PrintState(void* _CpuPtr)
if ( AccCache.state==EvictNext) str = std::string("EvictNext");
std::cout << GridLogMessage << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "<<std::endl;
std::cout << GridLogMessage << "0x"<<std::hex<<AccCache.CpuPtr<<std::dec
<< "\t0x"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str
std::cout << GridLogMessage << "\tx"<<std::hex<<AccCache.CpuPtr<<std::dec
<< "\tx"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str
<< "\t" << AccCache.cpuLock
<< "\t" << AccCache.accLock
<< "\t" << AccCache.LRU_valid<<std::endl;

View File

@ -12,7 +12,10 @@ uint64_t MemoryManager::HostToDeviceBytes;
uint64_t MemoryManager::DeviceToHostBytes;
uint64_t MemoryManager::HostToDeviceXfer;
uint64_t MemoryManager::DeviceToHostXfer;
uint64_t MemoryManager::DeviceEvictions;
uint64_t MemoryManager::DeviceDestroy;
void MemoryManager::Audit(std::string s){};
void MemoryManager::ViewClose(void* AccPtr,ViewMode mode){};
void *MemoryManager::ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint){ return CpuPtr; };
int MemoryManager::isOpen (void* CpuPtr) { return 0;}
@ -21,6 +24,7 @@ void MemoryManager::PrintState(void* CpuPtr)
std::cout << GridLogMessage << "Host<->Device memory movement not currently managed by Grid." << std::endl;
};
void MemoryManager::Print(void){};
void MemoryManager::PrintAll(void){};
void MemoryManager::NotifyDeletion(void *ptr){};
NAMESPACE_END(Grid);

View File

@ -53,11 +53,10 @@ public:
// Communicator should know nothing of the physics grid, only processor grid.
////////////////////////////////////////////
int _Nprocessors; // How many in all
int _processor; // linear processor rank
unsigned long _ndimension;
Coordinate _shm_processors; // Which dimensions get relayed out over processors lanes.
Coordinate _processors; // Which dimensions get relayed out over processors lanes.
int _processor; // linear processor rank
Coordinate _processor_coor; // linear processor coordinate
unsigned long _ndimension;
static Grid_MPI_Comm communicator_world;
Grid_MPI_Comm communicator;
std::vector<Grid_MPI_Comm> communicator_halo;
@ -98,9 +97,8 @@ public:
int BossRank(void) ;
int ThisRank(void) ;
const Coordinate & ThisProcessorCoor(void) ;
const Coordinate & ShmGrid(void) { return _shm_processors; } ;
const Coordinate & ProcessorGrid(void) ;
int ProcessorCount(void) ;
int ProcessorCount(void) ;
////////////////////////////////////////////////////////////////////////////////
// very VERY rarely (Log, serial RNG) we need world without a grid
@ -144,16 +142,16 @@ public:
int bytes);
double StencilSendToRecvFrom(void *xmit,
int xmit_to_rank,int do_xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,int do_recv,
int recv_from_rank,
int bytes,int dir);
double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,int do_xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,int do_recv,
int recv_from_rank,
int bytes,int dir);

View File

@ -106,7 +106,7 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors)
// Remap using the shared memory optimising routine
// The remap creates a comm which must be freed
////////////////////////////////////////////////////
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm,_shm_processors);
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm);
InitFromMPICommunicator(processors,optimal_comm);
SetCommunicator(optimal_comm);
///////////////////////////////////////////////////
@ -124,13 +124,12 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const
int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
Coordinate parent_processor_coor(_ndimension,0);
Coordinate parent_processors (_ndimension,1);
Coordinate shm_processors (_ndimension,1);
// Can make 5d grid from 4d etc...
int pad = _ndimension-parent_ndimension;
for(int d=0;d<parent_ndimension;d++){
parent_processor_coor[pad+d]=parent._processor_coor[d];
parent_processors [pad+d]=parent._processors[d];
shm_processors [pad+d]=parent._shm_processors[d];
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
@ -155,7 +154,6 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const
ccoor[d] = parent_processor_coor[d] % processors[d];
scoor[d] = parent_processor_coor[d] / processors[d];
ssize[d] = parent_processors[d] / processors[d];
if ( processors[d] < shm_processors[d] ) shm_processors[d] = processors[d]; // subnode splitting.
}
// rank within subcomm ; srank is rank of subcomm within blocks of subcomms
@ -337,22 +335,22 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
}
// Basic Halo comms primitive
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
int dest, int dox,
int dest,
void *recv,
int from, int dor,
int from,
int bytes,int dir)
{
std::vector<CommsRequest_t> list;
double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,dox,recv,from,dor,bytes,dir);
double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir);
StencilSendToRecvFromComplete(list,dir);
return offbytes;
}
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,int dox,
int dest,
void *recv,
int from,int dor,
int from,
int bytes,int dir)
{
int ncomm =communicator_halo.size();
@ -372,35 +370,32 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
double off_node_bytes=0.0;
int tag;
if ( dor ) {
if ( (gfrom ==MPI_UNDEFINED) || Stencil_force_mpi ) {
tag= dir+from*32;
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq);
assert(ierr==0);
list.push_back(rrq);
off_node_bytes+=bytes;
}
if ( (gfrom ==MPI_UNDEFINED) || Stencil_force_mpi ) {
tag= dir+from*32;
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq);
assert(ierr==0);
list.push_back(rrq);
off_node_bytes+=bytes;
}
if (dox) {
if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) {
tag= dir+_processor*32;
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq);
assert(ierr==0);
list.push_back(xrq);
off_node_bytes+=bytes;
} else {
void *shm = (void *) this->ShmBufferTranslate(dest,recv);
assert(shm!=NULL);
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes);
}
}
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
this->StencilSendToRecvFromComplete(list,dir);
list.resize(0);
if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) {
tag= dir+_processor*32;
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq);
assert(ierr==0);
list.push_back(xrq);
off_node_bytes+=bytes;
} else {
// TODO : make a OMP loop on CPU, call threaded bcopy
void *shm = (void *) this->ShmBufferTranslate(dest,recv);
assert(shm!=NULL);
// std::cout <<"acceleratorCopyDeviceToDeviceAsynch"<< std::endl;
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes);
}
// if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
// this->StencilSendToRecvFromComplete(list,dir);
// }
return off_node_bytes;
}
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir)

View File

@ -45,14 +45,12 @@ void CartesianCommunicator::Init(int *argc, char *** arv)
CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const CartesianCommunicator &parent,int &srank)
: CartesianCommunicator(processors)
{
_shm_processors = Coordinate(processors.size(),1);
srank=0;
SetCommunicator(communicator_world);
}
CartesianCommunicator::CartesianCommunicator(const Coordinate &processors)
{
_shm_processors = Coordinate(processors.size(),1);
_processors = processors;
_ndimension = processors.size(); assert(_ndimension>=1);
_processor_coor.resize(_ndimension);
@ -113,18 +111,18 @@ void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest
}
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
int xmit_to_rank,int dox,
int xmit_to_rank,
void *recv,
int recv_from_rank,int dor,
int recv_from_rank,
int bytes, int dir)
{
return 2.0*bytes;
}
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,int dox,
int xmit_to_rank,
void *recv,
int recv_from_rank,int dor,
int recv_from_rank,
int bytes, int dir)
{
return 2.0*bytes;

View File

@ -93,10 +93,9 @@ public:
// Create an optimal reordered communicator that makes MPI_Cart_create get it right
//////////////////////////////////////////////////////////////////////////////////////
static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD
// Turns MPI_COMM_WORLD into right layout for Cartesian
static void OptimalCommunicator (const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &ShmDims);
static void OptimalCommunicatorHypercube (const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &ShmDims);
static void OptimalCommunicatorSharedMemory(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &ShmDims);
static void OptimalCommunicator (const Coordinate &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian
static void OptimalCommunicatorHypercube (const Coordinate &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian
static void OptimalCommunicatorSharedMemory(const Coordinate &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian
static void GetShmDims(const Coordinate &WorldDims,Coordinate &ShmDims);
///////////////////////////////////////////////////
// Provide shared memory facilities off comm world

View File

@ -152,7 +152,7 @@ int Log2Size(int TwoToPower,int MAXLOG2)
}
return log2size;
}
void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_MPI_Comm & optimal_comm)
{
//////////////////////////////////////////////////////////////////////////////
// Look and see if it looks like an HPE 8600 based on hostname conventions
@ -165,8 +165,8 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M
gethostname(name,namelen);
int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
if(nscan==3 && HPEhypercube ) OptimalCommunicatorHypercube(processors,optimal_comm,SHM);
else OptimalCommunicatorSharedMemory(processors,optimal_comm,SHM);
if(nscan==3 && HPEhypercube ) OptimalCommunicatorHypercube(processors,optimal_comm);
else OptimalCommunicatorSharedMemory(processors,optimal_comm);
}
static inline int divides(int a,int b)
{
@ -221,7 +221,7 @@ void GlobalSharedMemory::GetShmDims(const Coordinate &WorldDims,Coordinate &ShmD
dim=(dim+1) %ndimension;
}
}
void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processors,Grid_MPI_Comm & optimal_comm)
{
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
@ -294,8 +294,7 @@ void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processo
Coordinate HyperCoor(ndimension);
GetShmDims(WorldDims,ShmDims);
SHM = ShmDims;
////////////////////////////////////////////////////////////////
// Establish torus of processes and nodes with sub-blockings
////////////////////////////////////////////////////////////////
@ -342,7 +341,7 @@ void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processo
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
assert(ierr==0);
}
void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const Coordinate &processors,Grid_MPI_Comm & optimal_comm)
{
////////////////////////////////////////////////////////////////
// Identify subblock of ranks on node spreading across dims
@ -354,8 +353,6 @@ void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const Coordinate &proce
Coordinate ShmCoor(ndimension); Coordinate NodeCoor(ndimension); Coordinate WorldCoor(ndimension);
GetShmDims(WorldDims,ShmDims);
SHM=ShmDims;
////////////////////////////////////////////////////////////////
// Establish torus of processes and nodes with sub-blockings
////////////////////////////////////////////////////////////////

View File

@ -48,10 +48,9 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
_ShmSetup=1;
}
void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_MPI_Comm & optimal_comm)
{
optimal_comm = WorldComm;
SHM = Coordinate(processors.size(),1);
}
////////////////////////////////////////////////////////////////////////////////////////////

File diff suppressed because it is too large Load Diff

View File

@ -129,7 +129,7 @@ public:
auto exprCopy = expr;
ExpressionViewOpen(exprCopy);
auto me = View(AcceleratorWriteDiscard);
auto me = View(AcceleratorWrite);
accelerator_for(ss,me.size(),vobj::Nsimd(),{
auto tmp = eval(ss,exprCopy);
coalescedWrite(me[ss],tmp);
@ -152,7 +152,7 @@ public:
auto exprCopy = expr;
ExpressionViewOpen(exprCopy);
auto me = View(AcceleratorWriteDiscard);
auto me = View(AcceleratorWrite);
accelerator_for(ss,me.size(),vobj::Nsimd(),{
auto tmp = eval(ss,exprCopy);
coalescedWrite(me[ss],tmp);
@ -174,7 +174,7 @@ public:
this->checkerboard=cb;
auto exprCopy = expr;
ExpressionViewOpen(exprCopy);
auto me = View(AcceleratorWriteDiscard);
auto me = View(AcceleratorWrite);
accelerator_for(ss,me.size(),vobj::Nsimd(),{
auto tmp = eval(ss,exprCopy);
coalescedWrite(me[ss],tmp);
@ -245,7 +245,7 @@ public:
///////////////////////////////////////////
// user defined constructor
///////////////////////////////////////////
Lattice(GridBase *grid,ViewMode mode=AcceleratorWriteDiscard) {
Lattice(GridBase *grid,ViewMode mode=AcceleratorWrite) {
this->_grid = grid;
resize(this->_grid->oSites());
assert((((uint64_t)&this->_odata[0])&0xF) ==0);
@ -288,7 +288,7 @@ public:
typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0;
conformable(*this,r);
this->checkerboard = r.Checkerboard();
auto me = View(AcceleratorWriteDiscard);
auto me = View(AcceleratorWrite);
auto him= r.View(AcceleratorRead);
accelerator_for(ss,me.size(),vobj::Nsimd(),{
coalescedWrite(me[ss],him(ss));
@ -303,7 +303,7 @@ public:
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
this->checkerboard = r.Checkerboard();
conformable(*this,r);
auto me = View(AcceleratorWriteDiscard);
auto me = View(AcceleratorWrite);
auto him= r.View(AcceleratorRead);
accelerator_for(ss,me.size(),vobj::Nsimd(),{
coalescedWrite(me[ss],him(ss));

View File

@ -125,12 +125,6 @@ void pokeSite(const sobj &s,Lattice<vobj> &l,const Coordinate &site){
//////////////////////////////////////////////////////////
// Peek a scalar object from the SIMD array
//////////////////////////////////////////////////////////
template<class vobj>
typename vobj::scalar_object peekSite(const Lattice<vobj> &l,const Coordinate &site){
typename vobj::scalar_object s;
peekSite(s,l,site);
return s;
}
template<class vobj,class sobj>
void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){

View File

@ -28,6 +28,9 @@ Author: Christoph Lehner <christoph@lhnr.de>
#if defined(GRID_CUDA)||defined(GRID_HIP)
#include <Grid/lattice/Lattice_reduction_gpu.h>
#endif
#if defined(GRID_SYCL)
#include <Grid/lattice/Lattice_reduction_sycl.h>
#endif
NAMESPACE_BEGIN(Grid);
@ -127,7 +130,7 @@ inline Double max(const Double *arg, Integer osites)
template<class vobj>
inline typename vobj::scalar_object sum(const vobj *arg, Integer osites)
{
#if defined(GRID_CUDA)||defined(GRID_HIP)
#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
return sum_gpu(arg,osites);
#else
return sum_cpu(arg,osites);
@ -136,7 +139,7 @@ inline typename vobj::scalar_object sum(const vobj *arg, Integer osites)
template<class vobj>
inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites)
{
#if defined(GRID_CUDA)||defined(GRID_HIP)
#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
return sumD_gpu(arg,osites);
#else
return sumD_cpu(arg,osites);
@ -145,7 +148,7 @@ inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites)
template<class vobj>
inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites)
{
#if defined(GRID_CUDA)||defined(GRID_HIP)
#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
return sumD_gpu_large(arg,osites);
#else
return sumD_cpu(arg,osites);
@ -155,13 +158,13 @@ inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites)
template<class vobj>
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
{
#if defined(GRID_CUDA)||defined(GRID_HIP)
autoView( arg_v, arg, AcceleratorRead);
Integer osites = arg.Grid()->oSites();
auto ssum= sum_gpu(&arg_v[0],osites);
#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
typename vobj::scalar_object ssum;
autoView( arg_v, arg, AcceleratorRead);
ssum= sum_gpu(&arg_v[0],osites);
#else
autoView(arg_v, arg, CpuRead);
Integer osites = arg.Grid()->oSites();
auto ssum= sum_cpu(&arg_v[0],osites);
#endif
arg.Grid()->GlobalSum(ssum);
@ -171,7 +174,7 @@ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
template<class vobj>
inline typename vobj::scalar_object sum_large(const Lattice<vobj> &arg)
{
#if defined(GRID_CUDA)||defined(GRID_HIP)
#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
autoView( arg_v, arg, AcceleratorRead);
Integer osites = arg.Grid()->oSites();
auto ssum= sum_gpu_large(&arg_v[0],osites);
@ -232,41 +235,23 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
const uint64_t sites = grid->oSites();
// Might make all code paths go this way.
#if 0
typedef decltype(innerProductD(vobj(),vobj())) inner_t;
Vector<inner_t> inner_tmp(sites);
auto inner_tmp_v = &inner_tmp[0];
{
autoView( left_v , left, AcceleratorRead);
autoView( right_v,right, AcceleratorRead);
// This code could read coalesce
// GPU - SIMT lane compliance...
accelerator_for( ss, sites, nsimd,{
auto x_l = left_v(ss);
auto y_l = right_v(ss);
coalescedWrite(inner_tmp_v[ss],innerProductD(x_l,y_l));
accelerator_for( ss, sites, 1,{
auto x_l = left_v[ss];
auto y_l = right_v[ss];
inner_tmp_v[ss]=innerProductD(x_l,y_l);
});
}
#else
typedef decltype(innerProduct(vobj(),vobj())) inner_t;
Vector<inner_t> inner_tmp(sites);
auto inner_tmp_v = &inner_tmp[0];
{
autoView( left_v , left, AcceleratorRead);
autoView( right_v,right, AcceleratorRead);
// GPU - SIMT lane compliance...
accelerator_for( ss, sites, nsimd,{
auto x_l = left_v(ss);
auto y_l = right_v(ss);
coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
});
}
#endif
// This is in single precision and fails some tests
auto anrm = sumD(inner_tmp_v,sites);
auto anrm = sum(inner_tmp_v,sites);
nrm = anrm;
return nrm;
}
@ -300,7 +285,7 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
conformable(x,y);
typedef typename vobj::scalar_type scalar_type;
// typedef typename vobj::vector_typeD vector_type;
typedef typename vobj::vector_typeD vector_type;
RealD nrm;
GridBase *grid = x.Grid();
@ -312,29 +297,17 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
autoView( x_v, x, AcceleratorRead);
autoView( y_v, y, AcceleratorRead);
autoView( z_v, z, AcceleratorWrite);
#if 0
typedef decltype(innerProductD(x_v[0],y_v[0])) inner_t;
Vector<inner_t> inner_tmp(sites);
auto inner_tmp_v = &inner_tmp[0];
accelerator_for( ss, sites, nsimd,{
auto tmp = a*x_v(ss)+b*y_v(ss);
coalescedWrite(inner_tmp_v[ss],innerProductD(tmp,tmp));
coalescedWrite(z_v[ss],tmp);
accelerator_for( ss, sites, 1,{
auto tmp = a*x_v[ss]+b*y_v[ss];
inner_tmp_v[ss]=innerProductD(tmp,tmp);
z_v[ss]=tmp;
});
nrm = real(TensorRemove(sum(inner_tmp_v,sites)));
#else
typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t;
Vector<inner_t> inner_tmp(sites);
auto inner_tmp_v = &inner_tmp[0];
accelerator_for( ss, sites, nsimd,{
auto tmp = a*x_v(ss)+b*y_v(ss);
coalescedWrite(inner_tmp_v[ss],innerProduct(tmp,tmp));
coalescedWrite(z_v[ss],tmp);
});
nrm = real(TensorRemove(sumD(inner_tmp_v,sites)));
#endif
grid->GlobalSum(nrm);
return nrm;
}

View File

@ -0,0 +1,125 @@
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////////////////////////////////////////////
// Possibly promote to double and sum
/////////////////////////////////////////////////////////////////////////////////////////////////////////
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer osites)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_objectD sobjD;
sobj *mysum =(sobj *) malloc_shared(sizeof(sobj),*theGridAccelerator);
sobj identity; zeroit(identity);
sobj ret ;
Integer nsimd= vobj::Nsimd();
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
auto Reduction = cl::sycl::reduction(mysum,identity,std::plus<>());
cgh.parallel_for(cl::sycl::range<1>{osites},
Reduction,
[=] (cl::sycl::id<1> item, auto &sum) {
auto osite = item[0];
sum +=Reduce(lat[osite]);
});
});
theGridAccelerator->wait();
ret = mysum[0];
free(mysum,*theGridAccelerator);
sobjD dret; convertType(dret,ret);
return dret;
}
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu_large(const vobj *lat, Integer osites)
{
return sumD_gpu_tensor(lat,osites);
}
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu_small(const vobj *lat, Integer osites)
{
return sumD_gpu_large(lat,osites);
}
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)
{
return sumD_gpu_large(lat,osites);
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////
// Return as same precision as input performing reduction in double precision though
/////////////////////////////////////////////////////////////////////////////////////////////////////////
template <class vobj>
inline typename vobj::scalar_object sum_gpu(const vobj *lat, Integer osites)
{
typedef typename vobj::scalar_object sobj;
sobj result;
result = sumD_gpu(lat,osites);
return result;
}
template <class vobj>
inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osites)
{
typedef typename vobj::scalar_object sobj;
sobj result;
result = sumD_gpu_large(lat,osites);
return result;
}
NAMESPACE_END(Grid);
/*
template<class Double> Double svm_reduce(Double *vec,uint64_t L)
{
Double sumResult; zeroit(sumResult);
Double *d_sum =(Double *)cl::sycl::malloc_shared(sizeof(Double),*theGridAccelerator);
Double identity; zeroit(identity);
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
auto Reduction = cl::sycl::reduction(d_sum,identity,std::plus<>());
cgh.parallel_for(cl::sycl::range<1>{L},
Reduction,
[=] (cl::sycl::id<1> index, auto &sum) {
sum +=vec[index];
});
});
theGridAccelerator->wait();
Double ret = d_sum[0];
free(d_sum,*theGridAccelerator);
std::cout << " svm_reduce finished "<<L<<" sites sum = " << ret <<std::endl;
return ret;
}
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu_repack(const vobj *lat, Integer osites)
{
typedef typename vobj::vector_type vector;
typedef typename vobj::scalar_type scalar;
typedef typename vobj::scalar_typeD scalarD;
typedef typename vobj::scalar_objectD sobjD;
sobjD ret;
scalarD *ret_p = (scalarD *)&ret;
const int nsimd = vobj::Nsimd();
const int words = sizeof(vobj)/sizeof(vector);
Vector<scalar> buffer(osites*nsimd);
scalar *buf = &buffer[0];
vector *dat = (vector *)lat;
for(int w=0;w<words;w++) {
accelerator_for(ss,osites,nsimd,{
int lane = acceleratorSIMTlane(nsimd);
buf[ss*nsimd+lane] = dat[ss*words+w].getlane(lane);
});
//Precision change at this point is to late to gain precision
ret_p[w] = svm_reduce(buf,nsimd*osites);
}
return ret;
}
*/

View File

@ -424,32 +424,9 @@ public:
// MT implementation does not implement fast discard even though
// in principle this is possible
////////////////////////////////////////////////
#if 1
thread_for( lidx, _grid->lSites(), {
int gidx;
int o_idx;
int i_idx;
int rank;
Coordinate pcoor;
Coordinate lcoor;
Coordinate gcoor;
_grid->LocalIndexToLocalCoor(lidx,lcoor);
pcoor=_grid->ThisProcessorCoor();
_grid->ProcessorCoorLocalCoorToGlobalCoor(pcoor,lcoor,gcoor);
_grid->GlobalCoorToGlobalIndex(gcoor,gidx);
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
assert(rank == _grid->ThisRank() );
int l_idx=generator_idx(o_idx,i_idx);
_generators[l_idx] = master_engine;
Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
});
#else
// Everybody loops over global volume.
thread_for( gidx, _grid->_gsites, {
// Where is it?
int rank;
int o_idx;
@ -466,7 +443,6 @@ public:
Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
}
});
#endif
#else
////////////////////////////////////////////////////////////////
// Machine and thread decomposition dependent seeding is efficient

View File

@ -0,0 +1,126 @@
NAMESPACE_BEGIN(Grid);
// If NOT CUDA or HIP -- we should provide
// -- atomicAdd(float *,float)
// -- atomicAdd(double *,double)
//
// Augment CUDA with complex atomics
#if !defined(GRID_HIP) || !defined(GRID_CUDA)
inline void atomicAdd(float *acc,float elem)
{
*acc += elem;
}
inline void atomicAdd(double *acc,double elem)
{
*acc += elem;
}
#endif
inline void atomicAdd(ComplexD *accum,ComplexD & elem)
{
double *a_p = (double *)accum;
double *e_p = (double *)&elem;
for(int w=0;w<2;w++){
atomicAdd(&a_p[w],e_p[w]);
}
}
inline void atomicAdd(ComplexF *accum,ComplexF & elem)
{
float *a_p = (float *)accum;
float *e_p = (float *)&elem;
for(int w=0;w<2;w++){
atomicAdd(&a_p[w],e_p[w]);
}
}
// Augment CUDA with vobj atomics
template<class vobj> accelerator_inline void atomicAdd(vobj *accum, vobj & elem)
{
typedef typename vobj::scalar_type scalar_type;
scalar_type *a_p= (scalar_type *)accum;
scalar_type *e_p= (scalar_type *)& elem;
for(int w=0;w<vobj::Nsimd();w++){
atomicAdd(&a_p[w],e_p[w]);
}
}
// Atomics based slice sum
template<class vobj> inline void sliceSumGpu(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_object::scalar_type scalar_type;
GridBase *grid = Data.Grid();
assert(grid!=NULL);
const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd();
assert(orthogdim >= 0);
assert(orthogdim < Nd);
int fd=grid->_fdimensions[orthogdim];
int ld=grid->_ldimensions[orthogdim];
int rd=grid->_rdimensions[orthogdim];
// Move to device memory and copy in / out
Vector<vobj> lvSum(rd); // will locally sum vectors first
Vector<sobj> lsSum(ld,Zero()); // sum across these down to scalars
ExtractBuffer<sobj> extracted(Nsimd); // splitting the SIMD
result.resize(fd); // And then global sum to return the same vector to every node
for(int r=0;r<rd;r++){
lvSum[r]=Zero();
}
int e1= grid->_slice_nblock[orthogdim];
int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim];
// sum over reduced dimension planes, breaking out orthog dir
// Parallel over orthog direction
autoView( Data_v, Data, AcceleratorRead);
auto lvSum_p=&lvSum[0];
int ostride = grid->_ostride[orthogdim];
accelerator_for( ree,rd*e1*e2,1, {
int b = ree%e2;
int re= ree/e2;
int n=re%e1;
int r=re/e1;
int so=r*ostride;
int ss=so+n*stride+b;
atomicAdd(&lvSum_p[r],Data_v[ss]);
});
// Sum across simd lanes in the plane, breaking out orthog dir.
Coordinate icoor(Nd);
for(int rt=0;rt<rd;rt++){
extract(lvSum[rt],extracted);
for(int idx=0;idx<Nsimd;idx++){
grid->iCoorFromIindex(icoor,idx);
int ldx =rt+icoor[orthogdim]*rd;
lsSum[ldx]=lsSum[ldx]+extracted[idx];
}
}
// sum over nodes.
for(int t=0;t<fd;t++){
int pt = t/ld; // processor plane
int lt = t%ld;
if ( pt == grid->_processor_coor[orthogdim] ) {
result[t]=lsSum[lt];
} else {
result[t]=Zero();
}
}
scalar_type * ptr = (scalar_type *) &result[0];
int words = fd*sizeof(sobj)/sizeof(scalar_type);
grid->GlobalSumVector(ptr, words);
}
NAMESPACE_END(Grid);

View File

@ -855,7 +855,7 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int
template<class vobj>
void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine)
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
{
typedef typename vobj::scalar_object sobj;

View File

@ -66,8 +66,10 @@ GridLogger GridLogError (1, "Error" , GridLogColours, "RED");
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW");
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL");
GridLogger GridLogMemory (1, "Memory", GridLogColours, "NORMAL");
GridLogger GridLogTracing(1, "Tracing", GridLogColours, "NORMAL");
GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE");
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
GridLogger GridLogDslash (1, "Dslash", GridLogColours, "BLUE");
GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE");
GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE");
GridLogger GridLogHMC (1, "HMC", GridLogColours, "BLUE");
@ -76,23 +78,27 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
GridLogError.Active(1);
GridLogWarning.Active(0);
GridLogMessage.Active(1); // at least the messages should be always on
GridLogMemory.Active(0); // at least the messages should be always on
GridLogMemory.Active(0);
GridLogTracing.Active(0);
GridLogIterative.Active(0);
GridLogDebug.Active(0);
GridLogPerformance.Active(0);
GridLogDslash.Active(0);
GridLogIntegrator.Active(1);
GridLogColours.Active(0);
GridLogHMC.Active(1);
for (int i = 0; i < logstreams.size(); i++) {
if (logstreams[i] == std::string("Tracing")) GridLogTracing.Active(1);
if (logstreams[i] == std::string("Memory")) GridLogMemory.Active(1);
if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1);
if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0);
if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1);
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1);
if (logstreams[i] == std::string("NoIntegrator")) GridLogIntegrator.Active(0);
if (logstreams[i] == std::string("NoHMC")) GridLogHMC.Active(0);
if (logstreams[i] == std::string("Dslash")) GridLogDslash.Active(1);
if (logstreams[i] == std::string("NoIntegrator"))GridLogIntegrator.Active(0);
if (logstreams[i] == std::string("NoHMC")) GridLogHMC.Active(0);
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
}
}

View File

@ -138,7 +138,8 @@ public:
stream << std::setw(log.topWidth);
}
stream << log.topName << log.background()<< " : ";
stream << log.colour() << std::left;
// stream << log.colour() << std::left;
stream << std::left;
if (log.chanWidth > 0)
{
stream << std::setw(log.chanWidth);
@ -153,9 +154,9 @@ public:
stream << log.evidence()
<< now << log.background() << " : " ;
}
stream << log.colour();
// stream << log.colour();
stream << std::right;
stream.flags(f);
return stream;
} else {
return devnull;
@ -180,10 +181,12 @@ extern GridLogger GridLogWarning;
extern GridLogger GridLogMessage;
extern GridLogger GridLogDebug ;
extern GridLogger GridLogPerformance;
extern GridLogger GridLogDslash;
extern GridLogger GridLogIterative ;
extern GridLogger GridLogIntegrator ;
extern GridLogger GridLogHMC;
extern GridLogger GridLogMemory;
extern GridLogger GridLogTracing;
extern Colours GridLogColours;
std::string demangle(const char* name) ;

View File

@ -42,10 +42,8 @@ using namespace Grid;
////////////////////////////////////////////////////////////////////////////////
class NerscIO : public BinaryIO {
public:
typedef Lattice<vLorentzColourMatrixD> GaugeField;
// Enable/disable exiting if the plaquette in the header does not match the value computed (default true)
static bool & exitOnReadPlaquetteMismatch(){ static bool v=true; return v; }
typedef Lattice<vLorentzColourMatrixD> GaugeField;
static inline void truncate(std::string file){
std::ofstream fout(file,std::ios::out);
@ -205,7 +203,7 @@ public:
std::cerr << " nersc_csum " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl;
exit(0);
}
if(exitOnReadPlaquetteMismatch()) assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
assert(nersc_csum == header.checksum );

View File

@ -27,10 +27,13 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */
#include <Grid/GridCore.h>
#include <Grid/perfmon/PerfCount.h>
#include <Grid/perfmon/Timer.h>
#include <Grid/perfmon/PerfCount.h>
NAMESPACE_BEGIN(Grid);
GridTimePoint theProgramStart = GridClock::now();
#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
#define RawConfig(A,B) (A<<8|B)
const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {

View File

@ -30,6 +30,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_PERFCOUNT_H
#define GRID_PERFCOUNT_H
#ifndef __SSC_START
#define __SSC_START
#define __SSC_STOP
#endif
#include <sys/time.h>
#include <ctime>
#include <chrono>

View File

@ -35,17 +35,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid)
// Dress the output; use std::chrono
// C++11 time facilities better?
inline double usecond(void) {
struct timeval tv;
tv.tv_sec = 0;
tv.tv_usec = 0;
gettimeofday(&tv,NULL);
return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
}
typedef std::chrono::system_clock GridClock;
//typedef std::chrono::system_clock GridClock;
typedef std::chrono::high_resolution_clock GridClock;
typedef std::chrono::time_point<GridClock> GridTimePoint;
typedef std::chrono::seconds GridSecs;
@ -53,6 +44,15 @@ typedef std::chrono::milliseconds GridMillisecs;
typedef std::chrono::microseconds GridUsecs;
typedef std::chrono::microseconds GridTime;
extern GridTimePoint theProgramStart;
// Dress the output; use std::chrono
// C++11 time facilities better?
inline double usecond(void) {
auto usecs = std::chrono::duration_cast<GridUsecs>(GridClock::now()-theProgramStart);
return 1.0*usecs.count();
}
inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time)
{
stream << time.count()<<" s";

70
Grid/perfmon/Tracing.h Normal file
View File

@ -0,0 +1,70 @@
#pragma once
NAMESPACE_BEGIN(Grid);
#ifdef GRID_TRACING_NVTX
#include <nvToolsExt.h>
class GridTracer {
public:
GridTracer(const char* name) {
nvtxRangePushA(name);
}
~GridTracer() {
nvtxRangePop();
}
};
inline void tracePush(const char *name) { nvtxRangePushA(name); }
inline void tracePop(const char *name) { nvtxRangePop(); }
inline int traceStart(const char *name) { }
inline void traceStop(int ID) { }
#endif
#ifdef GRID_TRACING_ROCTX
#include <roctracer/roctx.h>
class GridTracer {
public:
GridTracer(const char* name) {
roctxRangePushA(name);
std::cout << "roctxRangePush "<<name<<std::endl;
}
~GridTracer() {
roctxRangePop();
std::cout << "roctxRangePop "<<std::endl;
}
};
inline void tracePush(const char *name) { roctxRangePushA(name); }
inline void tracePop(const char *name) { roctxRangePop(); }
inline int traceStart(const char *name) { roctxRangeStart(name); }
inline void traceStop(int ID) { roctxRangeStop(ID); }
#endif
#ifdef GRID_TRACING_TIMER
class GridTracer {
public:
const char *name;
double elapsed;
GridTracer(const char* _name) {
name = _name;
elapsed=-usecond();
}
~GridTracer() {
elapsed+=usecond();
std::cout << GridLogTracing << name << " took " <<elapsed<< " us" <<std::endl;
}
};
inline void tracePush(const char *name) { }
inline void tracePop(const char *name) { }
inline int traceStart(const char *name) { return 0; }
inline void traceStop(int ID) { }
#endif
#ifdef GRID_TRACING_NONE
#define GRID_TRACE(name)
inline void tracePush(const char *name) { }
inline void tracePop(const char *name) { }
inline int traceStart(const char *name) { return 0; }
inline void traceStop(int ID) { }
#else
#define GRID_TRACE(name) GridTracer uniq_name_using_macros##__COUNTER__(name);
#endif
NAMESPACE_END(Grid);

View File

@ -63,7 +63,6 @@ static constexpr int Ngp=2; // gparity index range
#define ColourIndex (2)
#define SpinIndex (1)
#define LorentzIndex (0)
#define GparityFlavourIndex (0)
// Also should make these a named enum type
static constexpr int DaggerNo=0;
@ -88,8 +87,6 @@ template<typename T> struct isCoarsened {
template <typename T> using IfCoarsened = Invoke<std::enable_if< isCoarsened<T>::value,int> > ;
template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ;
const int GparityFlavourTensorIndex = 3; //TensorLevel counts from the bottom!
// ChrisK very keen to add extra space for Gparity doubling.
//
// Also add domain wall index, in a way where Wilson operator
@ -113,10 +110,8 @@ template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVec
template<typename vtype> using iSpinColourSpinColourMatrix = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >;
template<typename vtype> using iGparityFlavourVector = iVector<iScalar<iScalar<vtype> >, Ngp>;
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
template<typename vtype> using iGparityFlavourMatrix = iMatrix<iScalar<iScalar<vtype> >, Ngp>;
// Spin matrix
typedef iSpinMatrix<Complex > SpinMatrix;
@ -181,16 +176,6 @@ typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF;
typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD;
//G-parity flavour matrix
typedef iGparityFlavourMatrix<Complex> GparityFlavourMatrix;
typedef iGparityFlavourMatrix<ComplexF> GparityFlavourMatrixF;
typedef iGparityFlavourMatrix<ComplexD> GparityFlavourMatrixD;
typedef iGparityFlavourMatrix<vComplex> vGparityFlavourMatrix;
typedef iGparityFlavourMatrix<vComplexF> vGparityFlavourMatrixF;
typedef iGparityFlavourMatrix<vComplexD> vGparityFlavourMatrixD;
// Spin vector
typedef iSpinVector<Complex > SpinVector;
typedef iSpinVector<ComplexF> SpinVectorF;
@ -235,16 +220,6 @@ typedef iHalfSpinColourVector<ComplexD> HalfSpinColourVectorD;
typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF;
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
//G-parity flavour vector
typedef iGparityFlavourVector<Complex > GparityFlavourVector;
typedef iGparityFlavourVector<ComplexF> GparityFlavourVectorF;
typedef iGparityFlavourVector<ComplexD> GparityFlavourVectorD;
typedef iGparityFlavourVector<vComplex > vGparityFlavourVector;
typedef iGparityFlavourVector<vComplexF> vGparityFlavourVectorF;
typedef iGparityFlavourVector<vComplexD> vGparityFlavourVectorD;
// singlets
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
@ -476,9 +451,20 @@ template<class vobj> void pokeLorentz(vobj &lhs,const decltype(peekIndex<Lorentz
// Fermion <-> propagator assignements
//////////////////////////////////////////////
//template <class Prop, class Ferm>
#define FAST_FERM_TO_PROP
template <class Fimpl>
void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c)
{
#ifdef FAST_FERM_TO_PROP
autoView(p_v,p,CpuWrite);
autoView(f_v,f,CpuRead);
thread_for(idx,p_v.oSites(),{
for(int ss = 0; ss < Ns; ++ss) {
for(int cc = 0; cc < Fimpl::Dimension; ++cc) {
p_v[idx]()(ss,s)(cc,c) = f_v[idx]()(ss)(cc); // Propagator sink index is LEFT, suitable for left mult by gauge link (e.g.)
}}
});
#else
for(int j = 0; j < Ns; ++j)
{
auto pjs = peekSpin(p, j, s);
@ -490,12 +476,23 @@ void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::Fermio
}
pokeSpin(p, pjs, j, s);
}
#endif
}
//template <class Prop, class Ferm>
template <class Fimpl>
void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c)
{
#ifdef FAST_FERM_TO_PROP
autoView(p_v,p,CpuRead);
autoView(f_v,f,CpuWrite);
thread_for(idx,p_v.oSites(),{
for(int ss = 0; ss < Ns; ++ss) {
for(int cc = 0; cc < Fimpl::Dimension; ++cc) {
f_v[idx]()(ss)(cc) = p_v[idx]()(ss,s)(cc,c); // LEFT index is copied across for s,c right index
}}
});
#else
for(int j = 0; j < Ns; ++j)
{
auto pjs = peekSpin(p, j, s);
@ -507,6 +504,7 @@ void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::Propagato
}
pokeSpin(f, fj, j);
}
#endif
}
//////////////////////////////////////////////

View File

@ -40,29 +40,6 @@ class Action
public:
bool is_smeared = false;
RealD deriv_norm_sum;
RealD deriv_max_sum;
int deriv_num;
RealD deriv_us;
RealD S_us;
RealD refresh_us;
void reset_timer(void) {
deriv_us = S_us = refresh_us = 0.0;
deriv_num=0;
deriv_norm_sum = deriv_max_sum=0.0;
}
void deriv_log(RealD nrm, RealD max) { deriv_max_sum+=max; deriv_norm_sum+=nrm; deriv_num++;}
RealD deriv_max_average(void) { return deriv_max_sum/deriv_num; };
RealD deriv_norm_average(void) { return deriv_norm_sum/deriv_num; };
RealD deriv_timer(void) { return deriv_us; };
RealD S_timer(void) { return deriv_us; };
RealD refresh_timer(void) { return deriv_us; };
void deriv_timer_start(void) { deriv_us-=usecond(); }
void deriv_timer_stop(void) { deriv_us+=usecond(); }
void refresh_timer_start(void) { refresh_us-=usecond(); }
void refresh_timer_stop(void) { refresh_us+=usecond(); }
void S_timer_start(void) { S_us-=usecond(); }
void S_timer_stop(void) { S_us+=usecond(); }
// Heatbath?
virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
virtual RealD S(const GaugeField& U) = 0; // evaluate the action

View File

@ -37,10 +37,6 @@ NAMESPACE_CHECK(ActionSet);
#include <Grid/qcd/action/ActionParams.h>
NAMESPACE_CHECK(ActionParams);
#include <Grid/qcd/action/filters/MomentumFilter.h>
#include <Grid/qcd/action/filters/DirichletFilter.h>
#include <Grid/qcd/action/filters/DDHMCFilter.h>
////////////////////////////////////////////
// Gauge Actions
////////////////////////////////////////////

View File

@ -37,33 +37,24 @@ NAMESPACE_BEGIN(Grid);
// These can move into a params header and be given MacroMagic serialisation
struct GparityWilsonImplParams {
Coordinate twists;
//mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
Coordinate dirichlet; // Blocksize of dirichlet BCs
GparityWilsonImplParams() : twists(Nd, 0), dirichlet(Nd, 0) {};
GparityWilsonImplParams() : twists(Nd, 0) {};
};
struct WilsonImplParams {
bool overlapCommsCompute;
Coordinate dirichlet; // Blocksize of dirichlet BCs
AcceleratorVector<Real,Nd> twist_n_2pi_L;
AcceleratorVector<Complex,Nd> boundary_phases;
WilsonImplParams() {
dirichlet.resize(Nd,0);
boundary_phases.resize(Nd, 1.0);
twist_n_2pi_L.resize(Nd, 0.0);
};
WilsonImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi), overlapCommsCompute(false) {
twist_n_2pi_L.resize(Nd, 0.0);
dirichlet.resize(Nd,0);
}
};
struct StaggeredImplParams {
Coordinate dirichlet; // Blocksize of dirichlet BCs
StaggeredImplParams()
{
dirichlet.resize(Nd,0);
};
StaggeredImplParams() {};
};
struct OneFlavourRationalParams : Serializable {
@ -72,11 +63,9 @@ struct StaggeredImplParams {
RealD, hi,
int, MaxIter,
RealD, tolerance,
RealD, mdtolerance,
int, degree,
int, precision,
int, BoundsCheckFreq,
RealD, BoundsCheckTol);
int, BoundsCheckFreq);
// MaxIter and tolerance, vectors??
@ -87,62 +76,16 @@ struct StaggeredImplParams {
RealD tol = 1.0e-8,
int _degree = 10,
int _precision = 64,
int _BoundsCheckFreq=20,
RealD mdtol = 1.0e-6,
double _BoundsCheckTol=1e-6)
int _BoundsCheckFreq=20)
: lo(_lo),
hi(_hi),
MaxIter(_maxit),
tolerance(tol),
mdtolerance(mdtol),
degree(_degree),
precision(_precision),
BoundsCheckFreq(_BoundsCheckFreq),
BoundsCheckTol(_BoundsCheckTol){};
BoundsCheckFreq(_BoundsCheckFreq){};
};
/*Action parameters for the generalized rational action
The approximation is for (M^dag M)^{1/inv_pow}
where inv_pow is the denominator of the fractional power.
Default inv_pow=2 for square root, making this equivalent to
the OneFlavourRational action
*/
struct RationalActionParams : Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(RationalActionParams,
int, inv_pow,
RealD, lo, //low eigenvalue bound of rational approx
RealD, hi, //high eigenvalue bound of rational approx
int, MaxIter, //maximum iterations in msCG
RealD, action_tolerance, //msCG tolerance in action evaluation
int, action_degree, //rational approx tolerance in action evaluation
RealD, md_tolerance, //msCG tolerance in MD integration
int, md_degree, //rational approx tolerance in MD integration
int, precision, //precision of floating point arithmetic
int, BoundsCheckFreq); //frequency the approximation is tested (with Metropolis degree/tolerance); 0 disables the check
// constructor
RationalActionParams(int _inv_pow = 2,
RealD _lo = 0.0,
RealD _hi = 1.0,
int _maxit = 1000,
RealD _action_tolerance = 1.0e-8,
int _action_degree = 10,
RealD _md_tolerance = 1.0e-8,
int _md_degree = 10,
int _precision = 64,
int _BoundsCheckFreq=20)
: inv_pow(_inv_pow),
lo(_lo),
hi(_hi),
MaxIter(_maxit),
action_tolerance(_action_tolerance),
action_degree(_action_degree),
md_tolerance(_md_tolerance),
md_degree(_md_degree),
precision(_precision),
BoundsCheckFreq(_BoundsCheckFreq){};
};
NAMESPACE_END(Grid);
#endif

View File

@ -71,7 +71,6 @@ public:
RealD Mass(void) { return (mass_plus + mass_minus) / 2.0; };
RealD MassPlus(void) { return mass_plus; };
RealD MassMinus(void) { return mass_minus; };
void SetMass(RealD _mass) {
mass_plus=mass_minus=_mass;
SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs

View File

@ -204,15 +204,18 @@ public:
typedef WilsonCloverHelpers<Impl> Helpers;
typedef CompactWilsonCloverHelpers<Impl> CompactHelpers;
static void MassTerm(CloverField& Clover, RealD diag_mass) {
static void InstantiateClover(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) {
Clover += diag_mass;
}
static void Exponentiate_Clover(CloverDiagonalField& Diagonal,
CloverTriangleField& Triangle,
RealD csw_t, RealD diag_mass) {
static void InvertClover(CloverField& InvClover,
const CloverDiagonalField& diagonal,
const CloverTriangleField& triangle,
CloverDiagonalField& diagonalInv,
CloverTriangleField& triangleInv,
bool fixedBoundaries) {
// Do nothing
CompactHelpers::Invert(diagonal, triangle, diagonalInv, triangleInv);
}
// TODO: implement Cmunu for better performances with compact layout, but don't do it
@ -237,9 +240,17 @@ public:
template <typename vtype> using iImplClover = iScalar<iMatrix<iMatrix<vtype, Impl::Dimension>, Ns>>;
typedef CompactWilsonCloverHelpers<Impl> CompactHelpers;
static void MassTerm(CloverField& Clover, RealD diag_mass) {
// do nothing!
// mass term is multiplied to exp(Clover) below
// Can this be avoided?
static void IdentityTimesC(const CloverField& in, RealD c) {
int DimRep = Impl::Dimension;
autoView(in_v, in, AcceleratorWrite);
accelerator_for(ss, in.Grid()->oSites(), 1, {
for (int sa=0; sa<Ns; sa++)
for (int ca=0; ca<DimRep; ca++)
in_v[ss]()(sa,sa)(ca,ca) = c;
});
}
static int getNMAX(RealD prec, RealD R) {
@ -254,175 +265,62 @@ public:
return NMAX;
}
static int getNMAX(Lattice<iImplCloverDiagonal<vComplexD>> &t, RealD R) {return getNMAX(1e-12,R);}
static int getNMAX(Lattice<iImplCloverDiagonal<vComplexF>> &t, RealD R) {return getNMAX(1e-6,R);}
static int getNMAX(Lattice<iImplClover<vComplexD>> &t, RealD R) {return getNMAX(1e-12,R);}
static int getNMAX(Lattice<iImplClover<vComplexF>> &t, RealD R) {return getNMAX(1e-6,R);}
static void ExponentiateHermitean6by6(const iMatrix<ComplexD,6> &arg, const RealD& alpha, const std::vector<RealD>& cN, const int Niter, iMatrix<ComplexD,6>& dest){
static void InstantiateClover(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) {
typedef iMatrix<ComplexD,6> mat;
GridBase* grid = Clover.Grid();
CloverField ExpClover(grid);
RealD qn[6];
RealD qnold[6];
RealD p[5];
RealD trA2, trA3, trA4;
int NMAX = getNMAX(Clover, 3.*csw_t/diag_mass);
mat A2, A3, A4, A5;
A2 = alpha * alpha * arg * arg;
A3 = alpha * arg * A2;
A4 = A2 * A2;
A5 = A2 * A3;
Clover *= (1.0/diag_mass);
trA2 = toReal( trace(A2) );
trA3 = toReal( trace(A3) );
trA4 = toReal( trace(A4));
p[0] = toReal( trace(A3 * A3)) / 6.0 - 0.125 * trA4 * trA2 - trA3 * trA3 / 18.0 + trA2 * trA2 * trA2/ 48.0;
p[1] = toReal( trace(A5)) / 5.0 - trA3 * trA2 / 6.0;
p[2] = toReal( trace(A4)) / 4.0 - 0.125 * trA2 * trA2;
p[3] = trA3 / 3.0;
p[4] = 0.5 * trA2;
qnold[0] = cN[Niter];
qnold[1] = 0.0;
qnold[2] = 0.0;
qnold[3] = 0.0;
qnold[4] = 0.0;
qnold[5] = 0.0;
for(int i = Niter-1; i >= 0; i--)
{
qn[0] = p[0] * qnold[5] + cN[i];
qn[1] = p[1] * qnold[5] + qnold[0];
qn[2] = p[2] * qnold[5] + qnold[1];
qn[3] = p[3] * qnold[5] + qnold[2];
qn[4] = p[4] * qnold[5] + qnold[3];
qn[5] = qnold[4];
qnold[0] = qn[0];
qnold[1] = qn[1];
qnold[2] = qn[2];
qnold[3] = qn[3];
qnold[4] = qn[4];
qnold[5] = qn[5];
}
mat unit(1.0);
dest = (qn[0] * unit + qn[1] * alpha * arg + qn[2] * A2 + qn[3] * A3 + qn[4] * A4 + qn[5] * A5);
}
static void Exponentiate_Clover(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, RealD csw_t, RealD diag_mass) {
GridBase* grid = Diagonal.Grid();
int NMAX = getNMAX(Diagonal, 3.*csw_t/diag_mass);
//
// Implementation completely in Daniel's layout
//
// Taylor expansion with Cayley-Hamilton recursion
// underlying Horner scheme as above
// Taylor expansion, slow but generic
// Horner scheme: a0 + a1 x + a2 x^2 + .. = a0 + x (a1 + x(...))
// qN = cN
// qn = cn + qn+1 X
std::vector<RealD> cn(NMAX+1);
cn[0] = 1.0;
for (int i=1; i<=NMAX; i++){
for (int i=1; i<=NMAX; i++)
cn[i] = cn[i-1] / RealD(i);
}
// Taken over from Daniel's implementation
conformable(Diagonal, Triangle);
ExpClover = Zero();
IdentityTimesC(ExpClover, cn[NMAX]);
for (int i=NMAX-1; i>=0; i--)
ExpClover = ExpClover * Clover + cn[i];
long lsites = grid->lSites();
{
typedef typename SiteCloverDiagonal::scalar_object scalar_object_diagonal;
typedef typename SiteCloverTriangle::scalar_object scalar_object_triangle;
typedef iMatrix<ComplexD,6> mat;
// prepare inverse
CloverInv = (-1.0)*Clover;
autoView(diagonal_v, Diagonal, CpuRead);
autoView(triangle_v, Triangle, CpuRead);
autoView(diagonalExp_v, Diagonal, CpuWrite);
autoView(triangleExp_v, Triangle, CpuWrite);
Clover = ExpClover * diag_mass;
thread_for(site, lsites, { // NOTE: Not on GPU because of (peek/poke)LocalSite
ExpClover = Zero();
IdentityTimesC(ExpClover, cn[NMAX]);
for (int i=NMAX-1; i>=0; i--)
ExpClover = ExpClover * CloverInv + cn[i];
mat srcCloverOpUL(0.0); // upper left block
mat srcCloverOpLR(0.0); // lower right block
mat ExpCloverOp;
CloverInv = ExpClover * (1.0/diag_mass);
scalar_object_diagonal diagonal_tmp = Zero();
scalar_object_diagonal diagonal_exp_tmp = Zero();
scalar_object_triangle triangle_tmp = Zero();
scalar_object_triangle triangle_exp_tmp = Zero();
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
peekLocalSite(diagonal_tmp, diagonal_v, lcoor);
peekLocalSite(triangle_tmp, triangle_v, lcoor);
int block;
block = 0;
for(int i = 0; i < 6; i++){
for(int j = 0; j < 6; j++){
if (i == j){
srcCloverOpUL(i,j) = static_cast<ComplexD>(TensorRemove(diagonal_tmp()(block)(i)));
}
else{
srcCloverOpUL(i,j) = static_cast<ComplexD>(TensorRemove(CompactHelpers::triangle_elem(triangle_tmp, block, i, j)));
}
}
}
block = 1;
for(int i = 0; i < 6; i++){
for(int j = 0; j < 6; j++){
if (i == j){
srcCloverOpLR(i,j) = static_cast<ComplexD>(TensorRemove(diagonal_tmp()(block)(i)));
}
else{
srcCloverOpLR(i,j) = static_cast<ComplexD>(TensorRemove(CompactHelpers::triangle_elem(triangle_tmp, block, i, j)));
}
}
}
// exp(Clover)
ExponentiateHermitean6by6(srcCloverOpUL,1.0/diag_mass,cn,NMAX,ExpCloverOp);
block = 0;
for(int i = 0; i < 6; i++){
for(int j = 0; j < 6; j++){
if (i == j){
diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j);
}
else if(i < j){
triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j);
}
}
}
ExponentiateHermitean6by6(srcCloverOpLR,1.0/diag_mass,cn,NMAX,ExpCloverOp);
block = 1;
for(int i = 0; i < 6; i++){
for(int j = 0; j < 6; j++){
if (i == j){
diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j);
}
else if(i < j){
triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j);
}
}
}
pokeLocalSite(diagonal_exp_tmp, diagonalExp_v, lcoor);
pokeLocalSite(triangle_exp_tmp, triangleExp_v, lcoor);
});
}
Diagonal *= diag_mass;
Triangle *= diag_mass;
}
static void InvertClover(CloverField& InvClover,
const CloverDiagonalField& diagonal,
const CloverTriangleField& triangle,
CloverDiagonalField& diagonalInv,
CloverTriangleField& triangleInv,
bool fixedBoundaries) {
if (fixedBoundaries)
{
CompactHelpers::Invert(diagonal, triangle, diagonalInv, triangleInv);
}
else
{
CompactHelpers::ConvertLayout(InvClover, diagonalInv, triangleInv);
}
}
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
assert(0);

View File

@ -225,7 +225,7 @@ public:
RealD csw_t;
RealD cF;
bool open_boundaries;
bool fixedBoundaries;
CloverDiagonalField Diagonal, DiagonalEven, DiagonalOdd;
CloverDiagonalField DiagonalInv, DiagonalInvEven, DiagonalInvOdd;

View File

@ -49,8 +49,6 @@ public:
virtual FermionField &tmp(void) = 0;
virtual void DirichletBlock(const Coordinate & _Block) { assert(0); };
GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know
GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); };

View File

@ -30,18 +30,6 @@ directory
NAMESPACE_BEGIN(Grid);
/*
Policy implementation for G-parity boundary conditions
Rather than treating the gauge field as a flavored field, the Grid implementation of G-parity treats the gauge field as a regular
field with complex conjugate boundary conditions. In order to ensure the second flavor interacts with the conjugate links and the first
with the regular links we overload the functionality of doubleStore, whose purpose is to store the gauge field and the barrel-shifted gauge field
to avoid communicating links when applying the Dirac operator, such that the double-stored field contains also a flavor index which maps to
either the link or the conjugate link. This flavored field is then used by multLink to apply the correct link to a spinor.
Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.
mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
*/
template <class S, class Representation = FundamentalRepresentation, class Options=CoeffReal>
class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Representation::Dimension> > {
public:
@ -125,7 +113,7 @@ public:
|| ((distance== 1)&&(icoor[direction]==1))
|| ((distance==-1)&&(icoor[direction]==0));
permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu] && mmu < Nd-1; //only if we are going around the world in a spatial direction
permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu]; //only if we are going around the world
//Apply the links
int f_upper = permute_lane ? 1 : 0;
@ -151,10 +139,10 @@ public:
assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
assert((sl == 1) || (sl == 2));
//If this site is an global boundary site, perform the G-parity flavor twist
if ( mmu < Nd-1 && SE->_around_the_world && St.parameters.twists[mmu] ) {
if ( SE->_around_the_world && St.parameters.twists[mmu] ) {
if ( sl == 2 ) {
//Only do the twist for lanes on the edge of the physical node
ExtractBuffer<sobj> vals(Nsimd);
extract(chi,vals);
@ -209,19 +197,6 @@ public:
reg = memory;
}
//Poke 'poke_f0' onto flavor 0 and 'poke_f1' onto flavor 1 in direction mu of the doubled gauge field Uds
inline void pokeGparityDoubledGaugeField(DoubledGaugeField &Uds, const GaugeLinkField &poke_f0, const GaugeLinkField &poke_f1, const int mu){
autoView(poke_f0_v, poke_f0, CpuRead);
autoView(poke_f1_v, poke_f1, CpuRead);
autoView(Uds_v, Uds, CpuWrite);
thread_foreach(ss,poke_f0_v,{
Uds_v[ss](0)(mu) = poke_f0_v[ss]();
Uds_v[ss](1)(mu) = poke_f1_v[ss]();
});
}
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
{
conformable(Uds.Grid(),GaugeGrid);
@ -232,19 +207,14 @@ public:
GaugeLinkField Uconj(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid);
//Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.
//mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
for(int mu=0;mu<Nd-1;mu++){
if( Params.twists[mu] ){
LatticeCoordinate(coor,mu);
}
for(int mu=0;mu<Nd;mu++){
LatticeCoordinate(coor,mu);
U = PeekIndex<LorentzIndex>(Umu,mu);
Uconj = conjugate(U);
// Implement the isospin rotation sign on the boundary between f=1 and f=0
// This phase could come from a simple bc 1,1,-1,1 ..
int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
if ( Params.twists[mu] ) {
@ -259,7 +229,7 @@ public:
thread_foreach(ss,U_v,{
Uds_v[ss](0)(mu) = U_v[ss]();
Uds_v[ss](1)(mu) = Uconj_v[ss]();
});
});
}
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
@ -290,38 +260,6 @@ public:
});
}
}
{ //periodic / antiperiodic temporal BCs
int mu = Nd-1;
int L = GaugeGrid->GlobalDimensions()[mu];
int Lmu = L - 1;
LatticeCoordinate(coor, mu);
U = PeekIndex<LorentzIndex>(Umu, mu); //Get t-directed links
GaugeLinkField *Upoke = &U;
if(Params.twists[mu]){ //antiperiodic
Utmp = where(coor == Lmu, -U, U);
Upoke = &Utmp;
}
Uconj = conjugate(*Upoke); //second flavor interacts with conjugate links
pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu);
//Get the barrel-shifted field
Utmp = adj(Cshift(U, mu, -1)); //is a forward shift!
Upoke = &Utmp;
if(Params.twists[mu]){
U = where(coor == 0, -Utmp, Utmp); //boundary phase
Upoke = &U;
}
Uconj = conjugate(*Upoke);
pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu + 4);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) {
@ -360,48 +298,28 @@ public:
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){
assert(0);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) {
int Ls=Btilde.Grid()->_fdimensions[0];
{
GridBase *GaugeGrid = mat.Grid();
Lattice<iScalar<vInteger> > coor(GaugeGrid);
if( Params.twists[mu] ){
LatticeCoordinate(coor,mu);
}
autoView( mat_v , mat, AcceleratorWrite);
autoView( Btilde_v , Btilde, AcceleratorRead);
autoView( Atilde_v , Atilde, AcceleratorRead);
accelerator_for(sss,mat.Grid()->oSites(), FermionField::vector_type::Nsimd(),{
int sU=sss;
typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
ColorMatrixType sum;
zeroit(sum);
for(int s=0;s<Ls;s++){
int sF = s+Ls*sU;
for(int spn=0;spn<Ns;spn++){ //sum over spin
//Flavor 0
auto bb = coalescedRead(Btilde_v[sF](0)(spn) ); //color vector
auto aa = coalescedRead(Atilde_v[sF](0)(spn) );
sum = sum + outerProduct(bb,aa);
//Flavor 1
bb = coalescedRead(Btilde_v[sF](1)(spn) );
aa = coalescedRead(Atilde_v[sF](1)(spn) );
sum = sum + conjugate(outerProduct(bb,aa));
}
}
coalescedWrite(mat_v[sU](mu)(), sum);
});
}
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) {
int Ls = Btilde.Grid()->_fdimensions[0];
GaugeLinkField tmp(mat.Grid());
tmp = Zero();
{
autoView( tmp_v , tmp, CpuWrite);
autoView( Atilde_v , Atilde, CpuRead);
autoView( Btilde_v , Btilde, CpuRead);
thread_for(ss,tmp.Grid()->oSites(),{
for (int s = 0; s < Ls; s++) {
int sF = s + Ls * ss;
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde_v[sF], Atilde_v[sF]));
tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
}
});
}
PokeIndex<LorentzIndex>(mat, tmp, mu);
return;
}
};

View File

@ -117,19 +117,19 @@ public:
typedef decltype(coalescedRead(*in)) sobj;
typedef decltype(coalescedRead(*out0)) hsobj;
unsigned int Nsimd = vobj::Nsimd();
constexpr unsigned int Nsimd = vobj::Nsimd();
unsigned int mask = Nsimd >> (type + 1);
int lane = acceleratorSIMTlane(Nsimd);
int j0 = lane &(~mask); // inner coor zero
int j1 = lane |(mask) ; // inner coor one
const vobj *vp0 = &in[k];
const vobj *vp1 = &in[m];
const vobj *vp = (lane&mask) ? vp1:vp0;
auto sa = coalescedRead(*vp,j0);
auto sb = coalescedRead(*vp,j1);
const vobj *vp0 = &in[k]; // out0[j] = merge low bit of type from in[k] and in[m]
const vobj *vp1 = &in[m]; // out1[j] = merge hi bit of type from in[k] and in[m]
const vobj *vp = (lane&mask) ? vp1:vp0;// if my lane has high bit take vp1, low bit take vp0
auto sa = coalescedRead(*vp,j0); // lane to read for out 0, NB 50% read coalescing
auto sb = coalescedRead(*vp,j1); // lane to read for out 1
hsobj psa, psb;
projector::Proj(psa,sa,mu,dag);
projector::Proj(psb,sb,mu,dag);
projector::Proj(psa,sa,mu,dag); // spin project the result0
projector::Proj(psb,sb,mu,dag); // spin project the result1
coalescedWrite(out0[j],psa);
coalescedWrite(out1[j],psb);
#else
@ -297,7 +297,7 @@ public:
void ZeroCountersi(void) { }
void Reporti(int calls) { }
// Vector<int> surface_list;
std::vector<int> surface_list;
WilsonStencil(GridBase *grid,
int npoints,
@ -307,11 +307,10 @@ public:
: CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,p)
{
ZeroCountersi();
// surface_list.resize(0);
surface_list.resize(0);
this->same_node.resize(npoints);
};
/*
void BuildSurfaceList(int Ls,int vol4){
// find same node for SHM
@ -332,8 +331,7 @@ public:
}
}
}
*/
template < class compressor>
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)
{

View File

@ -75,10 +75,6 @@ public:
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }
int Dirichlet;
Coordinate Block;
/********** Deprecate timers **********/
void Report(void);
void ZeroCounters(void);
double DhopCalls;
@ -177,10 +173,7 @@ public:
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
double _M5,const ImplParams &p= ImplParams());
virtual void DirichletBlock(const Coordinate & block)
{
}
// Constructors
/*
WilsonFermion5D(int simd,

View File

@ -66,17 +66,18 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
M5Dcalls++;
M5Dtime-=usecond();
uint64_t nloop = grid->oSites();
uint64_t nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t s = sss%Ls;
uint64_t ss= sss-s;
uint64_t ss= sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1, tmp2;
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5m(tmp1,psi(idx_u));
spProj5p(tmp2,psi(idx_l));
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
for(int s=0;s<Ls;s++){
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5m(tmp1,psi(idx_u));
spProj5p(tmp2,psi(idx_l));
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
}
});
M5Dtime+=usecond();
}
@ -107,17 +108,18 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
M5Dcalls++;
M5Dtime-=usecond();
uint64_t nloop = grid->oSites();
uint64_t nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t s = sss%Ls;
uint64_t ss= sss-s;
uint64_t ss=sss*Ls;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2;
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5p(tmp1,psi(idx_u));
spProj5m(tmp2,psi(idx_l));
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
for(int s=0;s<Ls;s++){
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5p(tmp1,psi(idx_u));
spProj5m(tmp2,psi(idx_l));
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
}
});
M5Dtime+=usecond();
}

View File

@ -48,7 +48,7 @@ CompactWilsonCloverFermion<Impl, CloverHelpers>::CompactWilsonCloverFermion(Gaug
, csw_r(_csw_r)
, csw_t(_csw_t)
, cF(_cF)
, open_boundaries(impl_p.boundary_phases[Nd-1] == 0.0)
, fixedBoundaries(impl_p.boundary_phases[Nd-1] == 0.0)
, Diagonal(&Fgrid), Triangle(&Fgrid)
, DiagonalEven(&Hgrid), TriangleEven(&Hgrid)
, DiagonalOdd(&Hgrid), TriangleOdd(&Hgrid)
@ -67,7 +67,7 @@ CompactWilsonCloverFermion<Impl, CloverHelpers>::CompactWilsonCloverFermion(Gaug
csw_r /= clover_anisotropy.xi_0;
ImportGauge(_Umu);
if (open_boundaries) {
if (fixedBoundaries) {
this->BoundaryMaskEven.Checkerboard() = Even;
this->BoundaryMaskOdd.Checkerboard() = Odd;
CompactHelpers::SetupMasks(this->BoundaryMask, this->BoundaryMaskEven, this->BoundaryMaskOdd);
@ -77,31 +77,31 @@ CompactWilsonCloverFermion<Impl, CloverHelpers>::CompactWilsonCloverFermion(Gaug
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::Dhop(const FermionField& in, FermionField& out, int dag) {
WilsonBase::Dhop(in, out, dag);
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::DhopOE(const FermionField& in, FermionField& out, int dag) {
WilsonBase::DhopOE(in, out, dag);
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::DhopEO(const FermionField& in, FermionField& out, int dag) {
WilsonBase::DhopEO(in, out, dag);
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) {
WilsonBase::DhopDir(in, out, dir, disp);
if(this->open_boundaries) ApplyBoundaryMask(out);
if(this->fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::DhopDirAll(const FermionField& in, std::vector<FermionField>& out) {
WilsonBase::DhopDirAll(in, out);
if(this->open_boundaries) {
if(this->fixedBoundaries) {
for(auto& o : out) ApplyBoundaryMask(o);
}
}
@ -112,7 +112,7 @@ void CompactWilsonCloverFermion<Impl, CloverHelpers>::M(const FermionField& in,
WilsonBase::Dhop(in, out, DaggerNo); // call base to save applying bc
Mooee(in, Tmp);
axpy(out, 1.0, out, Tmp);
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
@ -121,19 +121,19 @@ void CompactWilsonCloverFermion<Impl, CloverHelpers>::Mdag(const FermionField& i
WilsonBase::Dhop(in, out, DaggerYes); // call base to save applying bc
MooeeDag(in, Tmp);
axpy(out, 1.0, out, Tmp);
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::Meooe(const FermionField& in, FermionField& out) {
WilsonBase::Meooe(in, out);
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::MeooeDag(const FermionField& in, FermionField& out) {
WilsonBase::MeooeDag(in, out);
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
@ -147,7 +147,7 @@ void CompactWilsonCloverFermion<Impl, CloverHelpers>::Mooee(const FermionField&
} else {
MooeeInternal(in, out, Diagonal, Triangle);
}
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
@ -166,7 +166,7 @@ void CompactWilsonCloverFermion<Impl, CloverHelpers>::MooeeInv(const FermionFiel
} else {
MooeeInternal(in, out, DiagonalInv, TriangleInv);
}
if(open_boundaries) ApplyBoundaryMask(out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
@ -186,7 +186,7 @@ void CompactWilsonCloverFermion<Impl, CloverHelpers>::MdirAll(const FermionField
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl, CloverHelpers>::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) {
assert(!open_boundaries); // TODO check for changes required for open bc
assert(!fixedBoundaries); // TODO check for changes required for open bc
// NOTE: code copied from original clover term
conformable(X.Grid(), Y.Grid());
@ -305,6 +305,7 @@ void CompactWilsonCloverFermion<Impl, CloverHelpers>::ImportGauge(const GaugeFie
GridBase* grid = _Umu.Grid();
typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid);
CloverField TmpOriginal(grid);
CloverField TmpInverse(grid);
// Compute the field strength terms mu>nu
double t2 = usecond();
@ -324,24 +325,27 @@ void CompactWilsonCloverFermion<Impl, CloverHelpers>::ImportGauge(const GaugeFie
TmpOriginal += Helpers::fillCloverXT(Ex) * csw_t;
TmpOriginal += Helpers::fillCloverYT(Ey) * csw_t;
TmpOriginal += Helpers::fillCloverZT(Ez) * csw_t;
// Handle mass term based on clover policy
CloverHelpers::MassTerm(TmpOriginal, this->diag_mass);
// Convert the data layout of the clover term
// Instantiate the clover term
// - In case of the standard clover the mass term is added
// - In case of the exponential clover the clover term is exponentiated
double t4 = usecond();
CloverHelpers::InstantiateClover(TmpOriginal, TmpInverse, csw_t, this->diag_mass);
// Convert the data layout of the clover term
double t5 = usecond();
CompactHelpers::ConvertLayout(TmpOriginal, Diagonal, Triangle);
// Exponentiate the clover (nothing happens in case of the standard clover)
double t5 = usecond();
CloverHelpers::Exponentiate_Clover(Diagonal, Triangle, csw_t, this->diag_mass);
// Possible modify the boundary values
// Modify the clover term at the temporal boundaries in case of open boundary conditions
double t6 = usecond();
if(open_boundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, this->diag_mass);
if(fixedBoundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, this->diag_mass);
// Invert the Clover term (explicit inversion needed for the improvement in case of open boundary conditions)
// Invert the Clover term
// In case of the exponential clover with (anti-)periodic boundary conditions exp(-Clover) saved
// in TmpInverse can be used. In all other cases the clover term has to be explictly inverted.
// TODO: For now this inversion is explictly done on the CPU
double t7 = usecond();
CompactHelpers::Invert(Diagonal, Triangle, DiagonalInv, TriangleInv);
CloverHelpers::InvertClover(TmpInverse, Diagonal, Triangle, DiagonalInv, TriangleInv, fixedBoundaries);
// Fill the remaining clover fields
double t8 = usecond();
@ -362,10 +366,10 @@ void CompactWilsonCloverFermion<Impl, CloverHelpers>::ImportGauge(const GaugeFie
std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl;
std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl;
std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl;
std::cout << GridLogDebug << "convert = " << (t5 - t4) / 1e6 << std::endl;
std::cout << GridLogDebug << "exponentiation = " << (t6 - t5) / 1e6 << std::endl;
std::cout << GridLogDebug << "boundaries = " << (t7 - t6) / 1e6 << std::endl;
std::cout << GridLogDebug << "inversions = " << (t8 - t7) / 1e6 << std::endl;
std::cout << GridLogDebug << "instantiate clover = " << (t5 - t4) / 1e6 << std::endl;
std::cout << GridLogDebug << "convert layout = " << (t6 - t5) / 1e6 << std::endl;
std::cout << GridLogDebug << "modify boundaries = " << (t7 - t6) / 1e6 << std::endl;
std::cout << GridLogDebug << "invert clover = " << (t8 - t7) / 1e6 << std::endl;
std::cout << GridLogDebug << "pick cbs = " << (t9 - t8) / 1e6 << std::endl;
std::cout << GridLogDebug << "total = " << (t9 - t0) / 1e6 << std::endl;
}

View File

@ -60,8 +60,7 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
UmuOdd (_FourDimRedBlackGrid),
Lebesgue(_FourDimGrid),
LebesgueEvenOdd(_FourDimRedBlackGrid),
_tmp(&FiveDimRedBlackGrid),
Dirichlet(0)
_tmp(&FiveDimRedBlackGrid)
{
// some assertions
assert(FiveDimGrid._ndimension==5);
@ -92,19 +91,6 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]);
}
if ( p.dirichlet.size() == Nd+1) {
Coordinate block = p.dirichlet;
if ( block[0] || block[1] || block[2] || block[3] || block[4] ){
Dirichlet = 1;
Block = block;
}
} else {
Coordinate block(Nd+1,0);
Block = block;
}
ZeroCounters();
if (Impl::LsVectorised) {
int nsimd = Simd::Nsimd();
@ -232,14 +218,6 @@ void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
{
GaugeField HUmu(_Umu.Grid());
HUmu = _Umu*(-0.5);
if ( Dirichlet ) {
std::cout << GridLogMessage << " Dirichlet BCs 5d " <<Block<<std::endl;
Coordinate GaugeBlock(Nd);
for(int d=0;d<Nd;d++) GaugeBlock[d] = Block[d+1];
std::cout << GridLogMessage << " Dirichlet BCs 4d " <<GaugeBlock<<std::endl;
DirichletFilter<GaugeField> Filter(GaugeBlock);
Filter.applyFilter(HUmu);
}
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
pickCheckerboard(Even,UmuEven,Umu);
pickCheckerboard(Odd ,UmuOdd,Umu);

View File

@ -440,17 +440,6 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
#define KERNEL_CALL_EXT(A) \
const uint64_t NN = Nsite*Ls; \
const uint64_t sz = st.surface_list.size(); \
auto ptr = &st.surface_list[0]; \
accelerator_forNB( ss, sz, Simd::Nsimd(), { \
int sF = ptr[ss]; \
int sU = ss/Ls; \
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
}); \
accelerator_barrier();
#define ASM_CALL(A) \
thread_for( ss, Nsite, { \
int sU = ss; \
@ -509,6 +498,7 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
#ifndef GRID_CUDA
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDag); return;}
#endif
acceleratorFenceComputeStream();
} else if( interior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagInt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagInt); return;}
@ -516,11 +506,13 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;}
#endif
} else if( exterior ) {
acceleratorFenceComputeStream();
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;}
#ifndef GRID_CUDA
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;}
#endif
acceleratorFenceComputeStream();
}
assert(0 && " Kernel optimisation case not covered ");
}

View File

@ -1,102 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/hmc/integrators/DirichletFilter.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
//--------------------------------------------------------------------
#pragma once
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////
// DDHMC filter with sub-block size B[mu]
////////////////////////////////////////////////////
template<typename GaugeField>
struct DDHMCFilter: public MomentumFilterBase<GaugeField>
{
Coordinate Block;
int Width;
DDHMCFilter(const Coordinate &_Block,int _Width=2): Block(_Block) { Width=_Width; }
void applyFilter(GaugeField &U) const override
{
GridBase *grid = U.Grid();
Coordinate Global=grid->GlobalDimensions();
GaugeField zzz(grid); zzz = Zero();
LatticeInteger coor(grid);
auto zzz_mu = PeekIndex<LorentzIndex>(zzz,0);
////////////////////////////////////////////////////
// Zero BDY layers
////////////////////////////////////////////////////
std::cout<<GridLogMessage<<" DDHMC Force Filter Block "<<Block<<" width " <<Width<<std::endl;
for(int mu=0;mu<Nd;mu++) {
Integer B1 = Block[mu];
if ( B1 && (B1 <= Global[mu]) ) {
LatticeCoordinate(coor,mu);
////////////////////////////////
// OmegaBar - zero all links contained in slice B-1,0 and
// mu links connecting to Omega
////////////////////////////////
if ( Width==1) {
U = where(mod(coor,B1)==Integer(B1-1),zzz,U);
U = where(mod(coor,B1)==Integer(0) ,zzz,U);
auto U_mu = PeekIndex<LorentzIndex>(U,mu);
U_mu = where(mod(coor,B1)==Integer(B1-2),zzz_mu,U_mu);
PokeIndex<LorentzIndex>(U, U_mu, mu);
}
if ( Width==2) {
U = where(mod(coor,B1)==Integer(B1-2),zzz,U);
U = where(mod(coor,B1)==Integer(B1-1),zzz,U);
U = where(mod(coor,B1)==Integer(0) ,zzz,U);
U = where(mod(coor,B1)==Integer(1) ,zzz,U);
auto U_mu = PeekIndex<LorentzIndex>(U,mu);
U_mu = where(mod(coor,B1)==Integer(B1-3),zzz_mu,U_mu);
PokeIndex<LorentzIndex>(U, U_mu, mu);
}
if ( Width==3) {
U = where(mod(coor,B1)==Integer(B1-3),zzz,U);
U = where(mod(coor,B1)==Integer(B1-2),zzz,U);
U = where(mod(coor,B1)==Integer(B1-1),zzz,U);
U = where(mod(coor,B1)==Integer(0) ,zzz,U);
U = where(mod(coor,B1)==Integer(1) ,zzz,U);
U = where(mod(coor,B1)==Integer(2) ,zzz,U);
auto U_mu = PeekIndex<LorentzIndex>(U,mu);
U_mu = where(mod(coor,B1)==Integer(B1-4),zzz_mu,U_mu);
PokeIndex<LorentzIndex>(U, U_mu, mu);
}
}
}
}
};
NAMESPACE_END(Grid);

View File

@ -1,71 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/hmc/integrators/DirichletFilter.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
//--------------------------------------------------------------------
#pragma once
NAMESPACE_BEGIN(Grid);
template<typename MomentaField>
struct DirichletFilter: public MomentumFilterBase<MomentaField>
{
typedef typename MomentaField::vector_type vector_type; //SIMD-vectorized complex type
typedef typename MomentaField::scalar_type scalar_type; //scalar complex type
typedef iScalar<iScalar<iScalar<vector_type> > > ScalarType; //complex phase for each site
Coordinate Block;
DirichletFilter(const Coordinate &_Block): Block(_Block){}
void applyFilter(MomentaField &P) const override
{
GridBase *grid = P.Grid();
typedef decltype(PeekIndex<LorentzIndex>(P, 0)) LatCM;
////////////////////////////////////////////////////
// Zero strictly links crossing between domains
////////////////////////////////////////////////////
LatticeInteger coor(grid);
LatCM zz(grid); zz = Zero();
for(int mu=0;mu<Nd;mu++) {
if ( (Block[mu]) && (Block[mu] <= grid->GlobalDimensions()[mu] ) ) {
// If costly could provide Grid earlier and precompute masks
std::cout << GridLogMessage << " Dirichlet in mu="<<mu<<std::endl;
LatticeCoordinate(coor,mu);
auto P_mu = PeekIndex<LorentzIndex>(P, mu);
P_mu = where(mod(coor,Block[mu])==Integer(Block[mu]-1),zz,P_mu);
PokeIndex<LorentzIndex>(P, P_mu, mu);
}
}
}
};
NAMESPACE_END(Grid);

View File

@ -69,11 +69,6 @@ public:
return PeriodicBC::ShiftStaple(Link,mu);
}
//Same as Cshift for periodic BCs
static inline GaugeLinkField CshiftLink(const GaugeLinkField &Link, int mu, int shift){
return PeriodicBC::CshiftLink(Link,mu,shift);
}
static inline bool isPeriodicGaugeField(void) { return true; }
};
@ -115,11 +110,6 @@ public:
return PeriodicBC::CovShiftBackward(Link, mu, field);
}
//If mu is a conjugate BC direction
//Out(x) = U^dag_\mu(x-mu) | x_\mu != 0
// = U^T_\mu(L-1) | x_\mu == 0
//else
//Out(x) = U^dag_\mu(x-mu mod L)
static inline GaugeLinkField
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu)
{
@ -139,13 +129,6 @@ public:
return PeriodicBC::CovShiftIdentityForward(Link,mu);
}
//If mu is a conjugate BC direction
//Out(x) = S_\mu(x+mu) | x_\mu != L-1
// = S*_\mu(x+mu) | x_\mu == L-1
//else
//Out(x) = S_\mu(x+mu mod L)
//Note: While this is used for Staples it is also applicable for shifting gauge links or gauge transformation matrices
static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu)
{
assert(_conjDirs.size() == Nd);
@ -155,27 +138,6 @@ public:
return PeriodicBC::ShiftStaple(Link,mu);
}
//Boundary-aware C-shift of gauge links / gauge transformation matrices
//For conjugate BC direction
//shift = 1
//Out(x) = U_\mu(x+\hat\mu) | x_\mu != L-1
// = U*_\mu(0) | x_\mu == L-1
//shift = -1
//Out(x) = U_\mu(x-mu) | x_\mu != 0
// = U*_\mu(L-1) | x_\mu == 0
//else
//shift = 1
//Out(x) = U_\mu(x+\hat\mu mod L)
//shift = -1
//Out(x) = U_\mu(x-\hat\mu mod L)
static inline GaugeLinkField CshiftLink(const GaugeLinkField &Link, int mu, int shift){
assert(_conjDirs.size() == Nd);
if(_conjDirs[mu])
return ConjugateBC::CshiftLink(Link,mu,shift);
else
return PeriodicBC::CshiftLink(Link,mu,shift);
}
static inline void setDirections(std::vector<int> &conjDirs) { _conjDirs=conjDirs; }
static inline std::vector<int> getDirections(void) { return _conjDirs; }
static inline bool isPeriodicGaugeField(void) { return false; }

View File

@ -49,7 +49,7 @@ NAMESPACE_BEGIN(Grid);
typedef Lattice<SiteLink> LinkField;
typedef Lattice<SiteField> Field;
typedef Field ComplexField;
typedef LinkField ComplexField;
};
typedef QedGImpl<vComplex> QedGImplR;

View File

@ -13,31 +13,6 @@ NAMESPACE_BEGIN(Grid);
std::cout << GridLogMessage << "Pseudofermion action lamda_max "<<lambda_max<<"( bound "<<hi<<")"<<std::endl;
assert( (lambda_max < hi) && " High Bounds Check on operator failed" );
}
template<class Field> void ChebyBoundsCheck(LinearOperatorBase<Field> &HermOp,
Field &GaussNoise,
RealD lo,RealD hi)
{
int orderfilter = 1000;
Chebyshev<Field> Cheb(lo,hi,orderfilter);
GridBase *FermionGrid = GaussNoise.Grid();
Field X(FermionGrid);
Field Z(FermionGrid);
X=GaussNoise;
RealD Nx = norm2(X);
Cheb(HermOp,X,Z);
RealD Nz = norm2(Z);
std::cout << "************************* "<<std::endl;
std::cout << " noise = "<<Nx<<std::endl;
std::cout << " Cheb x noise = "<<Nz<<std::endl;
std::cout << " Ratio = "<<Nz/Nx<<std::endl;
std::cout << "************************* "<<std::endl;
assert( ((Nz/Nx)<1.0) && " ChebyBoundsCheck ");
}
template<class Field> void InverseSqrtBoundsCheck(int MaxIter,double tol,
LinearOperatorBase<Field> &HermOp,
@ -65,65 +40,13 @@ NAMESPACE_BEGIN(Grid);
X=X-Y;
RealD Nd = norm2(X);
std::cout << "************************* "<<std::endl;
std::cout << " | noise |^2 = "<<Nx<<std::endl;
std::cout << " | (MdagM^-1/2)^2 noise |^2 = "<<Nz<<std::endl;
std::cout << " | MdagM (MdagM^-1/2)^2 noise |^2 = "<<Ny<<std::endl;
std::cout << " | noise - MdagM (MdagM^-1/2)^2 noise |^2 = "<<Nd<<std::endl;
std::cout << " | noise - MdagM (MdagM^-1/2)^2 noise|/|noise| = " << std::sqrt(Nd/Nx) << std::endl;
std::cout << " noise = "<<Nx<<std::endl;
std::cout << " (MdagM^-1/2)^2 noise = "<<Nz<<std::endl;
std::cout << " MdagM (MdagM^-1/2)^2 noise = "<<Ny<<std::endl;
std::cout << " noise - MdagM (MdagM^-1/2)^2 noise = "<<Nd<<std::endl;
std::cout << "************************* "<<std::endl;
assert( (std::sqrt(Nd/Nx)<tol) && " InverseSqrtBoundsCheck ");
}
/* For a HermOp = M^dag M, check the approximation of HermOp^{-1/inv_pow}
by computing |X - HermOp * [ Hermop^{-1/inv_pow} ]^{inv_pow} X| < tol
for noise X (aka GaussNoise).
ApproxNegPow should be the rational approximation for X^{-1/inv_pow}
*/
template<class Field> void InversePowerBoundsCheck(int inv_pow,
int MaxIter,double tol,
LinearOperatorBase<Field> &HermOp,
Field &GaussNoise,
MultiShiftFunction &ApproxNegPow)
{
GridBase *FermionGrid = GaussNoise.Grid();
Field X(FermionGrid);
Field Y(FermionGrid);
Field Z(FermionGrid);
Field tmp1(FermionGrid), tmp2(FermionGrid);
X=GaussNoise;
RealD Nx = norm2(X);
ConjugateGradientMultiShift<Field> msCG(MaxIter,ApproxNegPow);
tmp1 = X;
Field* in = &tmp1;
Field* out = &tmp2;
for(int i=0;i<inv_pow;i++){ //apply [ Hermop^{-1/inv_pow} ]^{inv_pow} X = HermOp^{-1} X
msCG(HermOp, *in, *out); //backwards conventions!
if(i!=inv_pow-1) std::swap(in, out);
}
Z = *out;
RealD Nz = norm2(Z);
HermOp.HermOp(Z,Y);
RealD Ny = norm2(Y);
X=X-Y;
RealD Nd = norm2(X);
std::cout << "************************* "<<std::endl;
std::cout << " | noise |^2 = "<<Nx<<std::endl;
std::cout << " | (MdagM^-1/" << inv_pow << ")^" << inv_pow << " noise |^2 = "<<Nz<<std::endl;
std::cout << " | MdagM (MdagM^-1/" << inv_pow << ")^" << inv_pow << " noise |^2 = "<<Ny<<std::endl;
std::cout << " | noise - MdagM (MdagM^-1/" << inv_pow << ")^" << inv_pow << " noise |^2 = "<<Nd<<std::endl;
std::cout << " | noise - MdagM (MdagM^-1/" << inv_pow << ")^" << inv_pow << " noise |/| noise | = "<<std::sqrt(Nd/Nx)<<std::endl;
std::cout << "************************* "<<std::endl;
assert( (std::sqrt(Nd/Nx)<tol) && " InversePowerBoundsCheck ");
}
NAMESPACE_END(Grid);

View File

@ -1,163 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/DomainDecomposedTwoFlavourBoundaryBoson.h
Copyright (C) 2021
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
///////////////////////////////////////
// Two flavour ratio
///////////////////////////////////////
template<class ImplD,class ImplF>
class DomainDecomposedBoundaryTwoFlavourBosonPseudoFermion : public Action<typename ImplD::GaugeField> {
public:
INHERIT_IMPL_TYPES(ImplD);
private:
SchurFactoredFermionOperator<ImplD,ImplF> & NumOp;// the basic operator
RealD InnerStoppingCondition;
RealD ActionStoppingCondition;
RealD DerivativeStoppingCondition;
FermionField Phi; // the pseudo fermion field for this trajectory
public:
DomainDecomposedBoundaryTwoFlavourBosonPseudoFermion(SchurFactoredFermionOperator<ImplD,ImplF> &_NumOp,RealD _DerivativeTol, RealD _ActionTol, RealD _InnerTol=1.0e-6)
: NumOp(_NumOp),
DerivativeStoppingCondition(_DerivativeTol),
ActionStoppingCondition(_ActionTol),
InnerStoppingCondition(_InnerTol),
Phi(_NumOp.FermionGrid()) {};
virtual std::string action_name(){return "DomainDecomposedBoundaryTwoFlavourBosonPseudoFermion";}
virtual std::string LogParameters(){
std::stringstream sstream;
return sstream.str();
}
virtual void refresh(const GaugeField &U, GridSerialRNG& sRNG, GridParallelRNG& pRNG)
{
// P(phi) = e^{- phi^dag P^dag P phi}
//
// NumOp == P
//
// Take phi = P^{-1} eta ; eta = P Phi
//
// P(eta) = e^{- eta^dag eta}
//
// e^{x^2/2 sig^2} => sig^2 = 0.5.
//
// So eta should be of width sig = 1/sqrt(2) and must multiply by 0.707....
//
RealD scale = std::sqrt(0.5);
NumOp.tolinner=InnerStoppingCondition;
NumOp.tol=ActionStoppingCondition;
NumOp.ImportGauge(U);
FermionField eta(NumOp.FermionGrid());
gaussian(pRNG,eta); eta=eta*scale;
NumOp.ProjectBoundaryBar(eta);
//DumpSliceNorm("eta",eta);
NumOp.RInv(eta,Phi);
//DumpSliceNorm("Phi",Phi);
};
//////////////////////////////////////////////////////
// S = phi^dag Pdag P phi
//////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) {
NumOp.tolinner=InnerStoppingCondition;
NumOp.tol=ActionStoppingCondition;
NumOp.ImportGauge(U);
FermionField Y(NumOp.FermionGrid());
NumOp.R(Phi,Y);
RealD action = norm2(Y);
return action;
};
virtual void deriv(const GaugeField &U,GaugeField & dSdU)
{
NumOp.tolinner=InnerStoppingCondition;
NumOp.tol=DerivativeStoppingCondition;
NumOp.ImportGauge(U);
GridBase *fgrid = NumOp.FermionGrid();
GridBase *ugrid = NumOp.GaugeGrid();
FermionField X(fgrid);
FermionField Y(fgrid);
FermionField tmp(fgrid);
GaugeField force(ugrid);
FermionField DobiDdbPhi(fgrid); // Vector A in my notes
FermionField DoiDdDobiDdbPhi(fgrid); // Vector B in my notes
FermionField DoidP_Phi(fgrid); // Vector E in my notes
FermionField DobidDddDoidP_Phi(fgrid); // Vector F in my notes
FermionField P_Phi(fgrid);
// P term
NumOp.dBoundaryBar(Phi,tmp);
NumOp.dOmegaBarInv(tmp,DobiDdbPhi); // Vector A
NumOp.dBoundary(DobiDdbPhi,tmp);
NumOp.dOmegaInv(tmp,DoiDdDobiDdbPhi); // Vector B
P_Phi = Phi - DoiDdDobiDdbPhi;
NumOp.ProjectBoundaryBar(P_Phi);
// P^dag P term
NumOp.dOmegaDagInv(P_Phi,DoidP_Phi); // Vector E
NumOp.dBoundaryDag(DoidP_Phi,tmp);
NumOp.dOmegaBarDagInv(tmp,DobidDddDoidP_Phi); // Vector F
NumOp.dBoundaryBarDag(DobidDddDoidP_Phi,tmp);
X = DobiDdbPhi;
Y = DobidDddDoidP_Phi;
NumOp.DirichletFermOpD.MDeriv(force,Y,X,DaggerNo); dSdU=force;
NumOp.DirichletFermOpD.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU+force;
X = DoiDdDobiDdbPhi;
Y = DoidP_Phi;
NumOp.DirichletFermOpD.MDeriv(force,Y,X,DaggerNo); dSdU=dSdU+force;
NumOp.DirichletFermOpD.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU+force;
dSdU *= -1.0;
};
};
NAMESPACE_END(Grid);

View File

@ -1,158 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/DomainDecomposedTwoFlavourBoundary.h
Copyright (C) 2021
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
///////////////////////////////////////
// Two flavour ratio
///////////////////////////////////////
template<class ImplD,class ImplF>
class DomainDecomposedBoundaryTwoFlavourPseudoFermion : public Action<typename ImplD::GaugeField> {
public:
INHERIT_IMPL_TYPES(ImplD);
private:
SchurFactoredFermionOperator<ImplD,ImplF> & DenOp;// the basic operator
RealD ActionStoppingCondition;
RealD DerivativeStoppingCondition;
RealD InnerStoppingCondition;
FermionField Phi; // the pseudo fermion field for this trajectory
RealD refresh_action;
public:
DomainDecomposedBoundaryTwoFlavourPseudoFermion(SchurFactoredFermionOperator<ImplD,ImplF> &_DenOp,RealD _DerivativeTol, RealD _ActionTol, RealD _InnerTol = 1.0e-6 )
: DenOp(_DenOp),
DerivativeStoppingCondition(_DerivativeTol),
ActionStoppingCondition(_ActionTol),
InnerStoppingCondition(_InnerTol),
Phi(_DenOp.FermionGrid()) {};
virtual std::string action_name(){return "DomainDecomposedBoundaryTwoFlavourPseudoFermion";}
virtual std::string LogParameters(){
std::stringstream sstream;
return sstream.str();
}
virtual void refresh(const GaugeField &U, GridSerialRNG& sRNG, GridParallelRNG& pRNG)
{
// P(phi) = e^{- phi^dag Rdag^-1 R^-1 phi}
//
// DenOp == R
//
// Take phi = R eta ; eta = R^-1 Phi
//
// P(eta) = e^{- eta^dag eta}
//
// e^{x^2/2 sig^2} => sig^2 = 0.5.
//
// So eta should be of width sig = 1/sqrt(2) and must multiply by 0.707....
//
RealD scale = std::sqrt(0.5);
DenOp.tolinner=InnerStoppingCondition;
DenOp.tol =ActionStoppingCondition;
DenOp.ImportGauge(U);
FermionField eta(DenOp.FermionGrid());
gaussian(pRNG,eta); eta=eta*scale;
DenOp.ProjectBoundaryBar(eta);
DenOp.R(eta,Phi);
//DumpSliceNorm("Phi",Phi);
refresh_action = norm2(eta);
};
//////////////////////////////////////////////////////
// S = phi^dag Rdag^-1 R^-1 phi
//////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) {
DenOp.tolinner=InnerStoppingCondition;
DenOp.tol=ActionStoppingCondition;
DenOp.ImportGauge(U);
FermionField X(DenOp.FermionGrid());
DenOp.RInv(Phi,X);
RealD action = norm2(X);
return action;
};
virtual void deriv(const GaugeField &U,GaugeField & dSdU)
{
DenOp.tolinner=InnerStoppingCondition;
DenOp.tol=DerivativeStoppingCondition;
DenOp.ImportGauge(U);
GridBase *fgrid = DenOp.FermionGrid();
GridBase *ugrid = DenOp.GaugeGrid();
FermionField X(fgrid);
FermionField Y(fgrid);
FermionField tmp(fgrid);
GaugeField force(ugrid);
FermionField DiDdb_Phi(fgrid); // Vector C in my notes
FermionField DidRinv_Phi(fgrid); // Vector D in my notes
FermionField Rinv_Phi(fgrid);
// FermionField RinvDagRinv_Phi(fgrid);
// FermionField DdbdDidRinv_Phi(fgrid);
// R^-1 term
DenOp.dBoundaryBar(Phi,tmp);
DenOp.Dinverse(tmp,DiDdb_Phi); // Vector C
Rinv_Phi = Phi - DiDdb_Phi;
DenOp.ProjectBoundaryBar(Rinv_Phi);
// R^-dagger R^-1 term
DenOp.DinverseDag(Rinv_Phi,DidRinv_Phi); // Vector D
/*
DenOp.dBoundaryBarDag(DidRinv_Phi,DdbdDidRinv_Phi);
RinvDagRinv_Phi = Rinv_Phi - DdbdDidRinv_Phi;
DenOp.ProjectBoundaryBar(RinvDagRinv_Phi);
*/
X = DiDdb_Phi;
Y = DidRinv_Phi;
DenOp.PeriodicFermOpD.MDeriv(force,Y,X,DaggerNo); dSdU=force;
DenOp.PeriodicFermOpD.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU+force;
DumpSliceNorm("force",dSdU);
dSdU *= -1.0;
};
};
NAMESPACE_END(Grid);

View File

@ -1,237 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/DomainDecomposedTwoFlavourBoundary.h
Copyright (C) 2021
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
///////////////////////////////////////
// Two flavour ratio
///////////////////////////////////////
template<class ImplD,class ImplF>
class DomainDecomposedBoundaryTwoFlavourRatioPseudoFermion : public Action<typename ImplD::GaugeField> {
public:
INHERIT_IMPL_TYPES(ImplD);
private:
SchurFactoredFermionOperator<ImplD,ImplF> & NumOp;// the basic operator
SchurFactoredFermionOperator<ImplD,ImplF> & DenOp;// the basic operator
RealD InnerStoppingCondition;
RealD ActionStoppingCondition;
RealD DerivativeStoppingCondition;
FermionField Phi; // the pseudo fermion field for this trajectory
public:
DomainDecomposedBoundaryTwoFlavourRatioPseudoFermion(SchurFactoredFermionOperator<ImplD,ImplF> &_NumOp,
SchurFactoredFermionOperator<ImplD,ImplF> &_DenOp,
RealD _DerivativeTol, RealD _ActionTol, RealD _InnerTol=1.0e-6)
: NumOp(_NumOp), DenOp(_DenOp),
Phi(_NumOp.PeriodicFermOpD.FermionGrid()),
InnerStoppingCondition(_InnerTol),
DerivativeStoppingCondition(_DerivativeTol),
ActionStoppingCondition(_ActionTol)
{};
virtual std::string action_name(){return "DomainDecomposedBoundaryTwoFlavourRatioPseudoFermion";}
virtual std::string LogParameters(){
std::stringstream sstream;
return sstream.str();
}
virtual void refresh(const GaugeField &U, GridSerialRNG& sRNG, GridParallelRNG& pRNG)
{
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
FermionField eta(NumOp.PeriodicFermOpD.FermionGrid());
FermionField tmp(NumOp.PeriodicFermOpD.FermionGrid());
// P(phi) = e^{- phi^dag P^dag Rdag^-1 R^-1 P phi}
//
// NumOp == P
// DenOp == R
//
// Take phi = P^{-1} R eta ; eta = R^-1 P Phi
//
// P(eta) = e^{- eta^dag eta}
//
// e^{x^2/2 sig^2} => sig^2 = 0.5.
//
// So eta should be of width sig = 1/sqrt(2) and must multiply by 0.707....
//
RealD scale = std::sqrt(0.5);
gaussian(pRNG,eta); eta=eta*scale;
NumOp.ProjectBoundaryBar(eta);
NumOp.tolinner=InnerStoppingCondition;
DenOp.tolinner=InnerStoppingCondition;
DenOp.tol = ActionStoppingCondition;
NumOp.tol = ActionStoppingCondition;
DenOp.R(eta,tmp);
NumOp.RInv(tmp,Phi);
DumpSliceNorm("Phi",Phi);
};
//////////////////////////////////////////////////////
// S = phi^dag Pdag Rdag^-1 R^-1 P phi
//////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) {
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
FermionField X(NumOp.PeriodicFermOpD.FermionGrid());
FermionField Y(NumOp.PeriodicFermOpD.FermionGrid());
NumOp.tolinner=InnerStoppingCondition;
DenOp.tolinner=InnerStoppingCondition;
DenOp.tol = ActionStoppingCondition;
NumOp.tol = ActionStoppingCondition;
NumOp.R(Phi,Y);
DenOp.RInv(Y,X);
RealD action = norm2(X);
// std::cout << " DD boundary action is " <<action<<std::endl;
return action;
};
virtual void deriv(const GaugeField &U,GaugeField & dSdU)
{
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
GridBase *fgrid = NumOp.PeriodicFermOpD.FermionGrid();
GridBase *ugrid = NumOp.PeriodicFermOpD.GaugeGrid();
FermionField X(fgrid);
FermionField Y(fgrid);
FermionField tmp(fgrid);
GaugeField force(ugrid);
FermionField DobiDdbPhi(fgrid); // Vector A in my notes
FermionField DoiDdDobiDdbPhi(fgrid); // Vector B in my notes
FermionField DiDdbP_Phi(fgrid); // Vector C in my notes
FermionField DidRinvP_Phi(fgrid); // Vector D in my notes
FermionField DdbdDidRinvP_Phi(fgrid);
FermionField DoidRinvDagRinvP_Phi(fgrid); // Vector E in my notes
FermionField DobidDddDoidRinvDagRinvP_Phi(fgrid); // Vector F in my notes
FermionField P_Phi(fgrid);
FermionField RinvP_Phi(fgrid);
FermionField RinvDagRinvP_Phi(fgrid);
FermionField PdagRinvDagRinvP_Phi(fgrid);
// RealD action = S(U);
NumOp.tolinner=InnerStoppingCondition;
DenOp.tolinner=InnerStoppingCondition;
DenOp.tol = DerivativeStoppingCondition;
NumOp.tol = DerivativeStoppingCondition;
// P term
NumOp.dBoundaryBar(Phi,tmp);
NumOp.dOmegaBarInv(tmp,DobiDdbPhi); // Vector A
NumOp.dBoundary(DobiDdbPhi,tmp);
NumOp.dOmegaInv(tmp,DoiDdDobiDdbPhi); // Vector B
P_Phi = Phi - DoiDdDobiDdbPhi;
NumOp.ProjectBoundaryBar(P_Phi);
// R^-1 P term
DenOp.dBoundaryBar(P_Phi,tmp);
DenOp.Dinverse(tmp,DiDdbP_Phi); // Vector C
RinvP_Phi = P_Phi - DiDdbP_Phi;
DenOp.ProjectBoundaryBar(RinvP_Phi); // Correct to here
// R^-dagger R^-1 P term
DenOp.DinverseDag(RinvP_Phi,DidRinvP_Phi); // Vector D
DenOp.dBoundaryBarDag(DidRinvP_Phi,DdbdDidRinvP_Phi);
RinvDagRinvP_Phi = RinvP_Phi - DdbdDidRinvP_Phi;
DenOp.ProjectBoundaryBar(RinvDagRinvP_Phi);
// P^dag R^-dagger R^-1 P term
NumOp.dOmegaDagInv(RinvDagRinvP_Phi,DoidRinvDagRinvP_Phi); // Vector E
NumOp.dBoundaryDag(DoidRinvDagRinvP_Phi,tmp);
NumOp.dOmegaBarDagInv(tmp,DobidDddDoidRinvDagRinvP_Phi); // Vector F
NumOp.dBoundaryBarDag(DobidDddDoidRinvDagRinvP_Phi,tmp);
PdagRinvDagRinvP_Phi = RinvDagRinvP_Phi- tmp;
NumOp.ProjectBoundaryBar(PdagRinvDagRinvP_Phi);
/*
std::cout << "S eval "<< action << std::endl;
std::cout << "S - IP1 "<< innerProduct(Phi,PdagRinvDagRinvP_Phi) << std::endl;
std::cout << "S - IP2 "<< norm2(RinvP_Phi) << std::endl;
NumOp.R(Phi,tmp);
tmp = tmp - P_Phi;
std::cout << "diff1 "<<norm2(tmp) <<std::endl;
DenOp.RInv(P_Phi,tmp);
tmp = tmp - RinvP_Phi;
std::cout << "diff2 "<<norm2(tmp) <<std::endl;
DenOp.RDagInv(RinvP_Phi,tmp);
tmp = tmp - RinvDagRinvP_Phi;
std::cout << "diff3 "<<norm2(tmp) <<std::endl;
DenOp.RDag(RinvDagRinvP_Phi,tmp);
tmp = tmp - PdagRinvDagRinvP_Phi;
std::cout << "diff4 "<<norm2(tmp) <<std::endl;
*/
dSdU=Zero();
X = DobiDdbPhi;
Y = DobidDddDoidRinvDagRinvP_Phi;
NumOp.DirichletFermOpD.MDeriv(force,Y,X,DaggerNo); dSdU=dSdU+force;
NumOp.DirichletFermOpD.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU+force;
X = DoiDdDobiDdbPhi;
Y = DoidRinvDagRinvP_Phi;
NumOp.DirichletFermOpD.MDeriv(force,Y,X,DaggerNo); dSdU=dSdU+force;
NumOp.DirichletFermOpD.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU+force;
X = DiDdbP_Phi;
Y = DidRinvP_Phi;
DenOp.PeriodicFermOpD.MDeriv(force,Y,X,DaggerNo); dSdU=dSdU+force;
DenOp.PeriodicFermOpD.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU+force;
dSdU *= -1.0;
};
};
NAMESPACE_END(Grid);

View File

@ -44,10 +44,6 @@ NAMESPACE_BEGIN(Grid);
// Exact one flavour implementation of DWF determinant ratio //
///////////////////////////////////////////////////////////////
//Note: using mixed prec CG for the heatbath solver in this action class will not work
// because the L, R operators must have their shift coefficients updated throughout the heatbath step
// You will find that the heatbath solver simply won't converge.
// To use mixed precision here use the ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction variant below
template<class Impl>
class ExactOneFlavourRatioPseudoFermionAction : public Action<typename Impl::GaugeField>
{
@ -61,60 +57,37 @@ NAMESPACE_BEGIN(Grid);
bool use_heatbath_forecasting;
AbstractEOFAFermion<Impl>& Lop; // the basic LH operator
AbstractEOFAFermion<Impl>& Rop; // the basic RH operator
SchurRedBlackDiagMooeeSolve<FermionField> SolverHBL;
SchurRedBlackDiagMooeeSolve<FermionField> SolverHBR;
SchurRedBlackDiagMooeeSolve<FermionField> SolverHB;
SchurRedBlackDiagMooeeSolve<FermionField> SolverL;
SchurRedBlackDiagMooeeSolve<FermionField> SolverR;
SchurRedBlackDiagMooeeSolve<FermionField> DerivativeSolverL;
SchurRedBlackDiagMooeeSolve<FermionField> DerivativeSolverR;
FermionField Phi; // the pseudofermion field for this trajectory
RealD norm2_eta; //|eta|^2 where eta is the random gaussian field used to generate the pseudofermion field
bool initial_action; //true for the first call to S after refresh, for which the identity S = |eta|^2 holds provided the rational approx is good
public:
//Used in the heatbath, refresh the shift coefficients of the L (LorR=0) or R (LorR=1) operator
virtual void heatbathRefreshShiftCoefficients(int LorR, RealD to){
AbstractEOFAFermion<Impl>&op = LorR == 0 ? Lop : Rop;
op.RefreshShiftCoefficients(to);
}
//Use the same solver for L,R in all cases
ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop,
AbstractEOFAFermion<Impl>& _Rop,
OperatorFunction<FermionField>& CG,
Params& p,
bool use_fc=false)
: ExactOneFlavourRatioPseudoFermionAction(_Lop,_Rop,CG,CG,CG,CG,CG,CG,p,use_fc) {};
//Use the same solver for L,R in the heatbath but different solvers elsewhere
: ExactOneFlavourRatioPseudoFermionAction(_Lop,_Rop,CG,CG,CG,CG,CG,p,use_fc) {};
ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop,
AbstractEOFAFermion<Impl>& _Rop,
OperatorFunction<FermionField>& HeatbathCG,
OperatorFunction<FermionField>& ActionCGL, OperatorFunction<FermionField>& ActionCGR,
OperatorFunction<FermionField>& DerivCGL , OperatorFunction<FermionField>& DerivCGR,
Params& p,
bool use_fc=false)
: ExactOneFlavourRatioPseudoFermionAction(_Lop,_Rop,HeatbathCG,HeatbathCG, ActionCGL, ActionCGR, DerivCGL,DerivCGR,p,use_fc) {};
//Use different solvers for L,R in all cases
ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop,
AbstractEOFAFermion<Impl>& _Rop,
OperatorFunction<FermionField>& HeatbathCGL, OperatorFunction<FermionField>& HeatbathCGR,
OperatorFunction<FermionField>& HeatbathCG,
OperatorFunction<FermionField>& ActionCGL, OperatorFunction<FermionField>& ActionCGR,
OperatorFunction<FermionField>& DerivCGL , OperatorFunction<FermionField>& DerivCGR,
Params& p,
bool use_fc=false) :
Lop(_Lop),
Rop(_Rop),
SolverHBL(HeatbathCGL,false,true), SolverHBR(HeatbathCGR,false,true),
SolverHB(HeatbathCG,false,true),
SolverL(ActionCGL, false, true), SolverR(ActionCGR, false, true),
DerivativeSolverL(DerivCGL, false, true), DerivativeSolverR(DerivCGR, false, true),
Phi(_Lop.FermionGrid()),
param(p),
use_heatbath_forecasting(use_fc),
initial_action(false)
use_heatbath_forecasting(use_fc)
{
AlgRemez remez(param.lo, param.hi, param.precision);
@ -124,8 +97,6 @@ NAMESPACE_BEGIN(Grid);
PowerNegHalf.Init(remez, param.tolerance, true);
};
const FermionField &getPhi() const{ return Phi; }
virtual std::string action_name() { return "ExactOneFlavourRatioPseudoFermionAction"; }
virtual std::string LogParameters() {
@ -146,19 +117,6 @@ NAMESPACE_BEGIN(Grid);
else{ for(int s=0; s<Ls; ++s){ axpby_ssp_pminus(out, 0.0, in, 1.0, in, s, s); } }
}
virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) {
// P(eta_o) = e^{- eta_o^dag eta_o}
//
// e^{x^2/2 sig^2} => sig^2 = 0.5.
//
RealD scale = std::sqrt(0.5);
FermionField eta (Lop.FermionGrid());
gaussian(pRNG,eta); eta = eta * scale;
refresh(U,eta);
}
// EOFA heatbath: see Eqn. (29) of arXiv:1706.05843
// We generate a Gaussian noise vector \eta, and then compute
// \Phi = M_{\rm EOFA}^{-1/2} * \eta
@ -166,10 +124,12 @@ NAMESPACE_BEGIN(Grid);
//
// As a check of rational require \Phi^dag M_{EOFA} \Phi == eta^dag M^-1/2^dag M M^-1/2 eta = eta^dag eta
//
void refresh(const GaugeField &U, const FermionField &eta) {
virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG)
{
Lop.ImportGauge(U);
Rop.ImportGauge(U);
FermionField eta (Lop.FermionGrid());
FermionField CG_src (Lop.FermionGrid());
FermionField CG_soln (Lop.FermionGrid());
FermionField Forecast_src(Lop.FermionGrid());
@ -180,6 +140,11 @@ NAMESPACE_BEGIN(Grid);
if(use_heatbath_forecasting){ prev_solns.reserve(param.degree); }
ChronoForecast<AbstractEOFAFermion<Impl>, FermionField> Forecast;
// Seed with Gaussian noise vector (var = 0.5)
RealD scale = std::sqrt(0.5);
gaussian(pRNG,eta);
eta = eta * scale;
// \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta
RealD N(PowerNegHalf.norm);
for(int k=0; k<param.degree; ++k){ N += PowerNegHalf.residues[k] / ( 1.0 + PowerNegHalf.poles[k] ); }
@ -195,15 +160,15 @@ NAMESPACE_BEGIN(Grid);
tmp[1] = Zero();
for(int k=0; k<param.degree; ++k){
gamma_l = 1.0 / ( 1.0 + PowerNegHalf.poles[k] );
heatbathRefreshShiftCoefficients(0, -gamma_l);
Lop.RefreshShiftCoefficients(-gamma_l);
if(use_heatbath_forecasting){ // Forecast CG guess using solutions from previous poles
Lop.Mdag(CG_src, Forecast_src);
CG_soln = Forecast(Lop, Forecast_src, prev_solns);
SolverHBL(Lop, CG_src, CG_soln);
SolverHB(Lop, CG_src, CG_soln);
prev_solns.push_back(CG_soln);
} else {
CG_soln = Zero(); // Just use zero as the initial guess
SolverHBL(Lop, CG_src, CG_soln);
SolverHB(Lop, CG_src, CG_soln);
}
Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
tmp[1] = tmp[1] + ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Lop.k ) * tmp[0];
@ -222,15 +187,15 @@ NAMESPACE_BEGIN(Grid);
if(use_heatbath_forecasting){ prev_solns.clear(); } // empirically, LH solns don't help for RH solves
for(int k=0; k<param.degree; ++k){
gamma_l = 1.0 / ( 1.0 + PowerNegHalf.poles[k] );
heatbathRefreshShiftCoefficients(1, -gamma_l*PowerNegHalf.poles[k]);
Rop.RefreshShiftCoefficients(-gamma_l*PowerNegHalf.poles[k]);
if(use_heatbath_forecasting){
Rop.Mdag(CG_src, Forecast_src);
CG_soln = Forecast(Rop, Forecast_src, prev_solns);
SolverHBR(Rop, CG_src, CG_soln);
SolverHB(Rop, CG_src, CG_soln);
prev_solns.push_back(CG_soln);
} else {
CG_soln = Zero();
SolverHBR(Rop, CG_src, CG_soln);
SolverHB(Rop, CG_src, CG_soln);
}
Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
tmp[1] = tmp[1] - ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Rop.k ) * tmp[0];
@ -240,117 +205,49 @@ NAMESPACE_BEGIN(Grid);
Phi = Phi + tmp[1];
// Reset shift coefficients for energy and force evals
heatbathRefreshShiftCoefficients(0, 0.0);
heatbathRefreshShiftCoefficients(1, -1.0);
//Mark that the next call to S is the first after refresh
initial_action = true;
Lop.RefreshShiftCoefficients(0.0);
Rop.RefreshShiftCoefficients(-1.0);
// Bounds check
RealD EtaDagEta = norm2(eta);
norm2_eta = EtaDagEta;
// RealD PhiDagMPhi= norm2(eta);
};
void Meofa(const GaugeField& U,const FermionField &in, FermionField & out)
void Meofa(const GaugeField& U,const FermionField &phi, FermionField & Mphi)
{
#if 0
Lop.ImportGauge(U);
Rop.ImportGauge(U);
FermionField spProj_in(Lop.FermionGrid());
FermionField spProj_Phi(Lop.FermionGrid());
FermionField mPhi(Lop.FermionGrid());
std::vector<FermionField> tmp(2, Lop.FermionGrid());
out = in;
mPhi = phi;
// LH term: S = S - k <\Phi| P_{-} \Omega_{-}^{\dagger} H(mf)^{-1} \Omega_{-} P_{-} |\Phi>
spProj(in, spProj_in, -1, Lop.Ls);
Lop.Omega(spProj_in, tmp[0], -1, 0);
spProj(Phi, spProj_Phi, -1, Lop.Ls);
Lop.Omega(spProj_Phi, tmp[0], -1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = Zero();
SolverL(Lop, tmp[1], tmp[0]);
Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back
Lop.Omega(tmp[1], tmp[0], -1, 1);
spProj(tmp[0], tmp[1], -1, Lop.Ls);
out = out - Lop.k * tmp[1];
mPhi = mPhi - Lop.k * innerProduct(spProj_Phi, tmp[0]).real();
// RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb)
// - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} |\Phi>
spProj(in, spProj_in, 1, Rop.Ls);
Rop.Omega(spProj_in, tmp[0], 1, 0);
// - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi>
spProj(Phi, spProj_Phi, 1, Rop.Ls);
Rop.Omega(spProj_Phi, tmp[0], 1, 0);
G5R5(tmp[1], tmp[0]);
tmp[0] = Zero();
SolverR(Rop, tmp[1], tmp[0]);
Rop.Dtilde(tmp[0], tmp[1]);
Rop.Omega(tmp[1], tmp[0], 1, 1);
spProj(tmp[0], tmp[1], 1, Rop.Ls);
out = out + Rop.k * tmp[1];
action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real();
#endif
}
//Due to the structure of EOFA, it is no more expensive to compute the inverse of Meofa
//To ensure correctness we can simply reuse the heatbath code but use the rational approx
//f(x) = 1/x which corresponds to alpha_0=0, alpha_1=1, beta_1=0 => gamma_1=1
void MeofaInv(const GaugeField &U, const FermionField &in, FermionField &out) {
Lop.ImportGauge(U);
Rop.ImportGauge(U);
FermionField CG_src (Lop.FermionGrid());
FermionField CG_soln (Lop.FermionGrid());
std::vector<FermionField> tmp(2, Lop.FermionGrid());
// \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta
// = 1 * \eta
out = in;
// LH terms:
// \Phi = \Phi + k \sum_{k=1}^{N_{p}} P_{-} \Omega_{-}^{\dagger} ( H(mf)
// - \gamma_{l} \Delta_{-}(mf,mb) P_{-} )^{-1} \Omega_{-} P_{-} \eta
spProj(in, tmp[0], -1, Lop.Ls);
Lop.Omega(tmp[0], tmp[1], -1, 0);
G5R5(CG_src, tmp[1]);
{
heatbathRefreshShiftCoefficients(0, -1.); //-gamma_1 = -1.
CG_soln = Zero(); // Just use zero as the initial guess
SolverHBL(Lop, CG_src, CG_soln);
Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
tmp[1] = Lop.k * tmp[0];
}
Lop.Omega(tmp[1], tmp[0], -1, 1);
spProj(tmp[0], tmp[1], -1, Lop.Ls);
out = out + tmp[1];
// RH terms:
// \Phi = \Phi - k \sum_{k=1}^{N_{p}} P_{+} \Omega_{+}^{\dagger} ( H(mb)
// - \beta_l\gamma_{l} \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} \eta
spProj(in, tmp[0], 1, Rop.Ls);
Rop.Omega(tmp[0], tmp[1], 1, 0);
G5R5(CG_src, tmp[1]);
{
heatbathRefreshShiftCoefficients(1, 0.); //-gamma_1 * beta_1 = 0
CG_soln = Zero();
SolverHBR(Rop, CG_src, CG_soln);
Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
tmp[1] = - Rop.k * tmp[0];
}
Rop.Omega(tmp[1], tmp[0], 1, 1);
spProj(tmp[0], tmp[1], 1, Rop.Ls);
out = out + tmp[1];
// Reset shift coefficients for energy and force evals
heatbathRefreshShiftCoefficients(0, 0.0);
heatbathRefreshShiftCoefficients(1, -1.0);
};
// EOFA action: see Eqn. (10) of arXiv:1706.05843
virtual RealD S(const GaugeField& U)
{
@ -374,7 +271,7 @@ NAMESPACE_BEGIN(Grid);
action -= Lop.k * innerProduct(spProj_Phi, tmp[0]).real();
// RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb)
// - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} |\Phi>
// - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi>
spProj(Phi, spProj_Phi, 1, Rop.Ls);
Rop.Omega(spProj_Phi, tmp[0], 1, 0);
G5R5(tmp[1], tmp[0]);
@ -384,26 +281,6 @@ NAMESPACE_BEGIN(Grid);
Rop.Omega(tmp[1], tmp[0], 1, 1);
action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real();
if(initial_action){
//For the first call to S after refresh, S = |eta|^2. We can use this to ensure the rational approx is good
RealD diff = action - norm2_eta;
//S_init = eta^dag M^{-1/2} M M^{-1/2} eta
//S_init - eta^dag eta = eta^dag ( M^{-1/2} M M^{-1/2} - 1 ) eta
//If approximate solution
//S_init - eta^dag eta = eta^dag ( [M^{-1/2}+\delta M^{-1/2}] M [M^{-1/2}+\delta M^{-1/2}] - 1 ) eta
// \approx eta^dag ( \delta M^{-1/2} M^{1/2} + M^{1/2}\delta M^{-1/2} ) eta
// We divide out |eta|^2 to remove source scaling but the tolerance on this check should still be somewhat higher than the actual approx tolerance
RealD test = fabs(diff)/norm2_eta; //test the quality of the rational approx
std::cout << GridLogMessage << action_name() << " initial action " << action << " expect " << norm2_eta << "; diff " << diff << std::endl;
std::cout << GridLogMessage << action_name() << "[ eta^dag ( M^{-1/2} M M^{-1/2} - 1 ) eta ]/|eta^2| = " << test << " expect 0 (tol " << param.BoundsCheckTol << ")" << std::endl;
assert( ( test < param.BoundsCheckTol ) && " Initial action check failed" );
initial_action = false;
}
return action;
};
@ -452,40 +329,6 @@ NAMESPACE_BEGIN(Grid);
};
};
template<class ImplD, class ImplF>
class ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction : public ExactOneFlavourRatioPseudoFermionAction<ImplD>{
public:
INHERIT_IMPL_TYPES(ImplD);
typedef OneFlavourRationalParams Params;
private:
AbstractEOFAFermion<ImplF>& LopF; // the basic LH operator
AbstractEOFAFermion<ImplF>& RopF; // the basic RH operator
public:
virtual std::string action_name() { return "ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction"; }
//Used in the heatbath, refresh the shift coefficients of the L (LorR=0) or R (LorR=1) operator
virtual void heatbathRefreshShiftCoefficients(int LorR, RealD to){
AbstractEOFAFermion<ImplF> &op = LorR == 0 ? LopF : RopF;
op.RefreshShiftCoefficients(to);
this->ExactOneFlavourRatioPseudoFermionAction<ImplD>::heatbathRefreshShiftCoefficients(LorR,to);
}
ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction(AbstractEOFAFermion<ImplF>& _LopF,
AbstractEOFAFermion<ImplF>& _RopF,
AbstractEOFAFermion<ImplD>& _LopD,
AbstractEOFAFermion<ImplD>& _RopD,
OperatorFunction<FermionField>& HeatbathCGL, OperatorFunction<FermionField>& HeatbathCGR,
OperatorFunction<FermionField>& ActionCGL, OperatorFunction<FermionField>& ActionCGR,
OperatorFunction<FermionField>& DerivCGL , OperatorFunction<FermionField>& DerivCGR,
Params& p,
bool use_fc=false) :
LopF(_LopF), RopF(_RopF), ExactOneFlavourRatioPseudoFermionAction<ImplD>(_LopD, _RopD, HeatbathCGL, HeatbathCGR, ActionCGL, ActionCGR, DerivCGL, DerivCGR, p, use_fc){}
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,372 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h
Copyright (C) 2015
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_H
#define QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_H
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////
// Generic rational approximation for ratios of operators
/////////////////////////////////////////////////////////
/* S_f = -log( det( [M^dag M]/[V^dag V] )^{1/inv_pow} )
= chi^dag ( [M^dag M]/[V^dag V] )^{-1/inv_pow} chi\
= chi^dag ( [V^dag V]^{-1/2} [M^dag M] [V^dag V]^{-1/2} )^{-1/inv_pow} chi\
= chi^dag [V^dag V]^{1/(2*inv_pow)} [M^dag M]^{-1/inv_pow} [V^dag V]^{1/(2*inv_pow)} chi\
S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
BIG WARNING:
Here V^dag V is referred to in this code as the "numerator" operator and M^dag M is the *denominator* operator.
this refers to their position in the pseudofermion action, which is the *inverse* of what appears in the determinant
Thus for DWF the numerator operator is the Pauli-Villars operator
Here P/Q \sim R_{1/(2*inv_pow)} ~ (V^dagV)^{1/(2*inv_pow)}
Here N/D \sim R_{-1/inv_pow} ~ (M^dagM)^{-1/inv_pow}
*/
template<class Impl>
class GeneralEvenOddRatioRationalPseudoFermionAction : public Action<typename Impl::GaugeField> {
public:
INHERIT_IMPL_TYPES(Impl);
typedef RationalActionParams Params;
Params param;
//For action evaluation
MultiShiftFunction ApproxPowerAction ; //rational approx for X^{1/inv_pow}
MultiShiftFunction ApproxNegPowerAction; //rational approx for X^{-1/inv_pow}
MultiShiftFunction ApproxHalfPowerAction; //rational approx for X^{1/(2*inv_pow)}
MultiShiftFunction ApproxNegHalfPowerAction; //rational approx for X^{-1/(2*inv_pow)}
//For the MD integration
MultiShiftFunction ApproxPowerMD ; //rational approx for X^{1/inv_pow}
MultiShiftFunction ApproxNegPowerMD; //rational approx for X^{-1/inv_pow}
MultiShiftFunction ApproxHalfPowerMD; //rational approx for X^{1/(2*inv_pow)}
MultiShiftFunction ApproxNegHalfPowerMD; //rational approx for X^{-1/(2*inv_pow)}
private:
FermionOperator<Impl> & NumOp;// the basic operator
FermionOperator<Impl> & DenOp;// the basic operator
FermionField PhiEven; // the pseudo fermion field for this trajectory
FermionField PhiOdd; // the pseudo fermion field for this trajectory
//Generate the approximation to x^{1/inv_pow} (->approx) and x^{-1/inv_pow} (-> approx_inv) by an approx_degree degree rational approximation
//CG_tolerance is used to issue a warning if the approximation error is larger than the tolerance of the CG and is otherwise just stored in the MultiShiftFunction for use by the multi-shift
static void generateApprox(MultiShiftFunction &approx, MultiShiftFunction &approx_inv, int inv_pow, int approx_degree, double CG_tolerance, AlgRemez &remez){
std::cout<<GridLogMessage << "Generating degree "<< approx_degree<<" approximation for x^(1/" << inv_pow << ")"<<std::endl;
double error = remez.generateApprox(approx_degree,1,inv_pow);
if(error > CG_tolerance)
std::cout<<GridLogMessage << "WARNING: Remez approximation has a larger error " << error << " than the CG tolerance " << CG_tolerance << "! Try increasing the number of poles" << std::endl;
approx.Init(remez, CG_tolerance,false);
approx_inv.Init(remez, CG_tolerance,true);
}
protected:
static constexpr bool Numerator = true;
static constexpr bool Denominator = false;
//Allow derived classes to override the multishift CG
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionField &in, FermionField &out){
SchurDifferentiableOperator<Impl> schurOp(numerator ? NumOp : DenOp);
ConjugateGradientMultiShift<FermionField> msCG(MaxIter, approx);
msCG(schurOp,in, out);
}
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionField &in, std::vector<FermionField> &out_elems, FermionField &out){
SchurDifferentiableOperator<Impl> schurOp(numerator ? NumOp : DenOp);
ConjugateGradientMultiShift<FermionField> msCG(MaxIter, approx);
msCG(schurOp,in, out_elems, out);
}
//Allow derived classes to override the gauge import
virtual void ImportGauge(const GaugeField &U){
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
}
public:
GeneralEvenOddRatioRationalPseudoFermionAction(FermionOperator<Impl> &_NumOp,
FermionOperator<Impl> &_DenOp,
const Params & p
) :
NumOp(_NumOp),
DenOp(_DenOp),
PhiOdd (_NumOp.FermionRedBlackGrid()),
PhiEven(_NumOp.FermionRedBlackGrid()),
param(p)
{
std::cout<<GridLogMessage << action_name() << " initialize: starting" << std::endl;
AlgRemez remez(param.lo,param.hi,param.precision);
//Generate approximations for action eval
generateApprox(ApproxPowerAction, ApproxNegPowerAction, param.inv_pow, param.action_degree, param.action_tolerance, remez);
generateApprox(ApproxHalfPowerAction, ApproxNegHalfPowerAction, 2*param.inv_pow, param.action_degree, param.action_tolerance, remez);
//Generate approximations for MD
if(param.md_degree != param.action_degree){ //note the CG tolerance is unrelated to the stopping condition of the Remez algorithm
generateApprox(ApproxPowerMD, ApproxNegPowerMD, param.inv_pow, param.md_degree, param.md_tolerance, remez);
generateApprox(ApproxHalfPowerMD, ApproxNegHalfPowerMD, 2*param.inv_pow, param.md_degree, param.md_tolerance, remez);
}else{
std::cout<<GridLogMessage << "Using same rational approximations for MD as for action evaluation" << std::endl;
ApproxPowerMD = ApproxPowerAction;
ApproxNegPowerMD = ApproxNegPowerAction;
for(int i=0;i<ApproxPowerMD.tolerances.size();i++)
ApproxNegPowerMD.tolerances[i] = ApproxPowerMD.tolerances[i] = param.md_tolerance; //used for multishift
ApproxHalfPowerMD = ApproxHalfPowerAction;
ApproxNegHalfPowerMD = ApproxNegHalfPowerAction;
for(int i=0;i<ApproxPowerMD.tolerances.size();i++)
ApproxNegHalfPowerMD.tolerances[i] = ApproxHalfPowerMD.tolerances[i] = param.md_tolerance;
}
std::cout<<GridLogMessage << action_name() << " initialize: complete" << std::endl;
};
virtual std::string action_name(){return "GeneralEvenOddRatioRationalPseudoFermionAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name()<<"] Power : 1/" << param.inv_pow << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Low :" << param.lo << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] High :" << param.hi << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Max iterations :" << param.MaxIter << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Tolerance (Action) :" << param.action_tolerance << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Degree (Action) :" << param.action_degree << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Tolerance (MD) :" << param.md_tolerance << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Degree (MD) :" << param.md_degree << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Precision :" << param.precision << std::endl;
return sstream.str();
}
//Access the fermion field
const FermionField &getPhiOdd() const{ return PhiOdd; }
virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) {
std::cout<<GridLogMessage << action_name() << " refresh: starting" << std::endl;
FermionField eta(NumOp.FermionGrid());
// P(eta) \propto e^{- eta^dag eta}
//
// The gaussian function draws from P(x) \propto e^{- x^2 / 2 } [i.e. sigma=1]
// Thus eta = x/sqrt{2} = x * sqrt(1/2)
RealD scale = std::sqrt(0.5);
gaussian(pRNG,eta); eta=eta*scale;
refresh(U,eta);
}
//Allow for manual specification of random field for testing
void refresh(const GaugeField &U, const FermionField &eta) {
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
//
// P(phi) = e^{- phi^dag (VdagV)^1/(2*inv_pow) (MdagM)^-1/inv_pow (VdagV)^1/(2*inv_pow) phi}
// = e^{- phi^dag (VdagV)^1/(2*inv_pow) (MdagM)^-1/(2*inv_pow) (MdagM)^-1/(2*inv_pow) (VdagV)^1/(2*inv_pow) phi}
//
// Phi = (VdagV)^-1/(2*inv_pow) Mdag^{1/(2*inv_pow)} eta
std::cout<<GridLogMessage << action_name() << " refresh: starting" << std::endl;
FermionField etaOdd (NumOp.FermionRedBlackGrid());
FermionField etaEven(NumOp.FermionRedBlackGrid());
FermionField tmp(NumOp.FermionRedBlackGrid());
pickCheckerboard(Even,etaEven,eta);
pickCheckerboard(Odd,etaOdd,eta);
ImportGauge(U);
// MdagM^1/(2*inv_pow) eta
std::cout<<GridLogMessage << action_name() << " refresh: doing (M^dag M)^{1/" << 2*param.inv_pow << "} eta" << std::endl;
multiShiftInverse(Denominator, ApproxHalfPowerAction, param.MaxIter, etaOdd, tmp);
// VdagV^-1/(2*inv_pow) MdagM^1/(2*inv_pow) eta
std::cout<<GridLogMessage << action_name() << " refresh: doing (V^dag V)^{-1/" << 2*param.inv_pow << "} ( (M^dag M)^{1/" << 2*param.inv_pow << "} eta)" << std::endl;
multiShiftInverse(Numerator, ApproxNegHalfPowerAction, param.MaxIter, tmp, PhiOdd);
assert(NumOp.ConstEE() == 1);
assert(DenOp.ConstEE() == 1);
PhiEven = Zero();
std::cout<<GridLogMessage << action_name() << " refresh: starting" << std::endl;
};
//////////////////////////////////////////////////////
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
//////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) {
std::cout<<GridLogMessage << action_name() << " compute action: starting" << std::endl;
ImportGauge(U);
FermionField X(NumOp.FermionRedBlackGrid());
FermionField Y(NumOp.FermionRedBlackGrid());
// VdagV^1/(2*inv_pow) Phi
std::cout<<GridLogMessage << action_name() << " compute action: doing (V^dag V)^{1/" << 2*param.inv_pow << "} Phi" << std::endl;
multiShiftInverse(Numerator, ApproxHalfPowerAction, param.MaxIter, PhiOdd,X);
// MdagM^-1/(2*inv_pow) VdagV^1/(2*inv_pow) Phi
std::cout<<GridLogMessage << action_name() << " compute action: doing (M^dag M)^{-1/" << 2*param.inv_pow << "} ( (V^dag V)^{1/" << 2*param.inv_pow << "} Phi)" << std::endl;
multiShiftInverse(Denominator, ApproxNegHalfPowerAction, param.MaxIter, X,Y);
// Randomly apply rational bounds checks.
int rcheck = rand();
auto grid = NumOp.FermionGrid();
auto r=rand();
grid->Broadcast(0,r);
if ( param.BoundsCheckFreq != 0 && (r % param.BoundsCheckFreq)==0 ) {
std::cout<<GridLogMessage << action_name() << " compute action: doing bounds check" << std::endl;
FermionField gauss(NumOp.FermionRedBlackGrid());
gauss = PhiOdd;
SchurDifferentiableOperator<Impl> MdagM(DenOp);
std::cout<<GridLogMessage << action_name() << " compute action: checking high bounds" << std::endl;
HighBoundCheck(MdagM,gauss,param.hi);
std::cout<<GridLogMessage << action_name() << " compute action: full approximation" << std::endl;
InversePowerBoundsCheck(param.inv_pow,param.MaxIter,param.action_tolerance*100,MdagM,gauss,ApproxNegPowerAction);
std::cout<<GridLogMessage << action_name() << " compute action: bounds check complete" << std::endl;
}
// Phidag VdagV^1/(2*inv_pow) MdagM^-1/(2*inv_pow) MdagM^-1/(2*inv_pow) VdagV^1/(2*inv_pow) Phi
RealD action = norm2(Y);
std::cout<<GridLogMessage << action_name() << " compute action: complete" << std::endl;
return action;
};
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
//
// Here, M is some 5D operator and V is the Pauli-Villars field
// N and D makeup the rat. poly of the M term and P and & makeup the rat.poly of the denom term
//
// Need
// dS_f/dU = chi^dag d[P/Q] N/D P/Q chi
// + chi^dag P/Q d[N/D] P/Q chi
// + chi^dag P/Q N/D d[P/Q] chi
//
// P/Q is expressed as partial fraction expansion:
//
// a0 + \sum_k ak/(V^dagV + bk)
//
// d[P/Q] is then
//
// \sum_k -ak [V^dagV+bk]^{-1} [ dV^dag V + V^dag dV ] [V^dag V + bk]^{-1}
//
// and similar for N/D.
//
// Need
// MpvPhi_k = [Vdag V + bk]^{-1} chi
// MpvPhi = {a0 + \sum_k ak [Vdag V + bk]^{-1} }chi
//
// MfMpvPhi_k = [MdagM+bk]^{-1} MpvPhi
// MfMpvPhi = {a0 + \sum_k ak [Mdag M + bk]^{-1} } MpvPhi
//
// MpvMfMpvPhi_k = [Vdag V + bk]^{-1} MfMpvchi
//
virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
std::cout<<GridLogMessage << action_name() << " deriv: starting" << std::endl;
const int n_f = ApproxNegPowerMD.poles.size();
const int n_pv = ApproxHalfPowerMD.poles.size();
std::vector<FermionField> MpvPhi_k (n_pv,NumOp.FermionRedBlackGrid());
std::vector<FermionField> MpvMfMpvPhi_k(n_pv,NumOp.FermionRedBlackGrid());
std::vector<FermionField> MfMpvPhi_k (n_f ,NumOp.FermionRedBlackGrid());
FermionField MpvPhi(NumOp.FermionRedBlackGrid());
FermionField MfMpvPhi(NumOp.FermionRedBlackGrid());
FermionField MpvMfMpvPhi(NumOp.FermionRedBlackGrid());
FermionField Y(NumOp.FermionRedBlackGrid());
GaugeField tmp(NumOp.GaugeGrid());
ImportGauge(U);
std::cout<<GridLogMessage << action_name() << " deriv: doing (V^dag V)^{1/" << 2*param.inv_pow << "} Phi" << std::endl;
multiShiftInverse(Numerator, ApproxHalfPowerMD, param.MaxIter, PhiOdd,MpvPhi_k,MpvPhi);
std::cout<<GridLogMessage << action_name() << " deriv: doing (M^dag M)^{-1/" << param.inv_pow << "} ( (V^dag V)^{1/" << 2*param.inv_pow << "} Phi)" << std::endl;
multiShiftInverse(Denominator, ApproxNegPowerMD, param.MaxIter, MpvPhi,MfMpvPhi_k,MfMpvPhi);
std::cout<<GridLogMessage << action_name() << " deriv: doing (V^dag V)^{1/" << 2*param.inv_pow << "} ( (M^dag M)^{-1/" << param.inv_pow << "} (V^dag V)^{1/" << 2*param.inv_pow << "} Phi)" << std::endl;
multiShiftInverse(Numerator, ApproxHalfPowerMD, param.MaxIter, MfMpvPhi,MpvMfMpvPhi_k,MpvMfMpvPhi);
SchurDifferentiableOperator<Impl> MdagM(DenOp);
SchurDifferentiableOperator<Impl> VdagV(NumOp);
RealD ak;
dSdU = Zero();
// With these building blocks
//
// dS/dU =
// \sum_k -ak MfMpvPhi_k^dag [ dM^dag M + M^dag dM ] MfMpvPhi_k (1)
// + \sum_k -ak MpvMfMpvPhi_k^\dag [ dV^dag V + V^dag dV ] MpvPhi_k (2)
// -ak MpvPhi_k^dag [ dV^dag V + V^dag dV ] MpvMfMpvPhi_k (3)
//(1)
std::cout<<GridLogMessage << action_name() << " deriv: doing dS/dU part (1)" << std::endl;
for(int k=0;k<n_f;k++){
ak = ApproxNegPowerMD.residues[k];
MdagM.Mpc(MfMpvPhi_k[k],Y);
MdagM.MpcDagDeriv(tmp , MfMpvPhi_k[k], Y ); dSdU=dSdU+ak*tmp;
MdagM.MpcDeriv(tmp , Y, MfMpvPhi_k[k] ); dSdU=dSdU+ak*tmp;
}
//(2)
//(3)
std::cout<<GridLogMessage << action_name() << " deriv: doing dS/dU part (2)+(3)" << std::endl;
for(int k=0;k<n_pv;k++){
ak = ApproxHalfPowerMD.residues[k];
VdagV.Mpc(MpvPhi_k[k],Y);
VdagV.MpcDagDeriv(tmp,MpvMfMpvPhi_k[k],Y); dSdU=dSdU+ak*tmp;
VdagV.MpcDeriv (tmp,Y,MpvMfMpvPhi_k[k]); dSdU=dSdU+ak*tmp;
VdagV.Mpc(MpvMfMpvPhi_k[k],Y); // V as we take Ydag
VdagV.MpcDeriv (tmp,Y, MpvPhi_k[k]); dSdU=dSdU+ak*tmp;
VdagV.MpcDagDeriv(tmp,MpvPhi_k[k], Y); dSdU=dSdU+ak*tmp;
}
//dSdU = Ta(dSdU);
std::cout<<GridLogMessage << action_name() << " deriv: complete" << std::endl;
};
};
NAMESPACE_END(Grid);
#endif

View File

@ -1,93 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/GeneralEvenOddRationalRatioMixedPrec.h
Copyright (C) 2015
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_MIXED_PREC_H
#define QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_MIXED_PREC_H
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Generic rational approximation for ratios of operators utilizing the mixed precision multishift algorithm
// cf. GeneralEvenOddRational.h for details
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class ImplD, class ImplF>
class GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction : public GeneralEvenOddRatioRationalPseudoFermionAction<ImplD> {
private:
typedef typename ImplD::FermionField FermionFieldD;
typedef typename ImplF::FermionField FermionFieldF;
FermionOperator<ImplD> & NumOpD;
FermionOperator<ImplD> & DenOpD;
FermionOperator<ImplF> & NumOpF;
FermionOperator<ImplF> & DenOpF;
Integer ReliableUpdateFreq;
protected:
//Allow derived classes to override the multishift CG
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){
SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD);
SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
ConjugateGradientMultiShiftMixedPrec<FermionFieldD, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
msCG(schurOpD, in, out);
}
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){
SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD);
SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
ConjugateGradientMultiShiftMixedPrec<FermionFieldD, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
msCG(schurOpD, in, out_elems, out);
}
//Allow derived classes to override the gauge import
virtual void ImportGauge(const typename ImplD::GaugeField &Ud){
typename ImplF::GaugeField Uf(NumOpF.GaugeGrid());
precisionChange(Uf, Ud);
NumOpD.ImportGauge(Ud);
DenOpD.ImportGauge(Ud);
NumOpF.ImportGauge(Uf);
DenOpF.ImportGauge(Uf);
}
public:
GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction(FermionOperator<ImplD> &_NumOpD, FermionOperator<ImplD> &_DenOpD,
FermionOperator<ImplF> &_NumOpF, FermionOperator<ImplF> &_DenOpF,
const RationalActionParams & p, Integer _ReliableUpdateFreq
) : GeneralEvenOddRatioRationalPseudoFermionAction<ImplD>(_NumOpD, _DenOpD, p),
ReliableUpdateFreq(_ReliableUpdateFreq), NumOpD(_NumOpD), DenOpD(_DenOpD), NumOpF(_NumOpF), DenOpF(_DenOpF){}
virtual std::string action_name(){return "GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction";}
};
NAMESPACE_END(Grid);
#endif

View File

@ -40,31 +40,249 @@ NAMESPACE_BEGIN(Grid);
// Here N/D \sim R_{-1/2} ~ (M^dagM)^{-1/2}
template<class Impl>
class OneFlavourEvenOddRatioRationalPseudoFermionAction : public GeneralEvenOddRatioRationalPseudoFermionAction<Impl> {
class OneFlavourEvenOddRatioRationalPseudoFermionAction : public Action<typename Impl::GaugeField> {
public:
INHERIT_IMPL_TYPES(Impl);
typedef OneFlavourRationalParams Params;
Params param;
MultiShiftFunction PowerHalf ;
MultiShiftFunction PowerNegHalf;
MultiShiftFunction PowerQuarter;
MultiShiftFunction PowerNegQuarter;
private:
static RationalActionParams transcribe(const Params &in){
RationalActionParams out;
out.inv_pow = 2;
out.lo = in.lo;
out.hi = in.hi;
out.MaxIter = in.MaxIter;
out.action_tolerance = out.md_tolerance = in.tolerance;
out.action_degree = out.md_degree = in.degree;
out.precision = in.precision;
out.BoundsCheckFreq = in.BoundsCheckFreq;
return out;
}
FermionOperator<Impl> & NumOp;// the basic operator
FermionOperator<Impl> & DenOp;// the basic operator
FermionField PhiEven; // the pseudo fermion field for this trajectory
FermionField PhiOdd; // the pseudo fermion field for this trajectory
public:
OneFlavourEvenOddRatioRationalPseudoFermionAction(FermionOperator<Impl> &_NumOp,
FermionOperator<Impl> &_DenOp,
const Params & p
) :
GeneralEvenOddRatioRationalPseudoFermionAction<Impl>(_NumOp, _DenOp, transcribe(p)){}
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
OneFlavourEvenOddRatioRationalPseudoFermionAction(FermionOperator<Impl> &_NumOp,
FermionOperator<Impl> &_DenOp,
Params & p
) :
NumOp(_NumOp),
DenOp(_DenOp),
PhiOdd (_NumOp.FermionRedBlackGrid()),
PhiEven(_NumOp.FermionRedBlackGrid()),
param(p)
{
AlgRemez remez(param.lo,param.hi,param.precision);
// MdagM^(+- 1/2)
std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/2)"<<std::endl;
remez.generateApprox(param.degree,1,2);
PowerHalf.Init(remez,param.tolerance,false);
PowerNegHalf.Init(remez,param.tolerance,true);
// MdagM^(+- 1/4)
std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/4)"<<std::endl;
remez.generateApprox(param.degree,1,4);
PowerQuarter.Init(remez,param.tolerance,false);
PowerNegQuarter.Init(remez,param.tolerance,true);
};
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name()<<"] Low :" << param.lo << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] High :" << param.hi << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Max iterations :" << param.MaxIter << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Tolerance :" << param.tolerance << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Degree :" << param.degree << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Precision :" << param.precision << std::endl;
return sstream.str();
}
virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) {
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
//
// P(phi) = e^{- phi^dag (VdagV)^1/4 (MdagM)^-1/2 (VdagV)^1/4 phi}
// = e^{- phi^dag (VdagV)^1/4 (MdagM)^-1/4 (MdagM)^-1/4 (VdagV)^1/4 phi}
//
// Phi = (VdagV)^-1/4 Mdag^{1/4} eta
//
// P(eta) = e^{- eta^dag eta}
//
// e^{x^2/2 sig^2} => sig^2 = 0.5.
//
// So eta should be of width sig = 1/sqrt(2).
RealD scale = std::sqrt(0.5);
FermionField eta(NumOp.FermionGrid());
FermionField etaOdd (NumOp.FermionRedBlackGrid());
FermionField etaEven(NumOp.FermionRedBlackGrid());
FermionField tmp(NumOp.FermionRedBlackGrid());
gaussian(pRNG,eta); eta=eta*scale;
pickCheckerboard(Even,etaEven,eta);
pickCheckerboard(Odd,etaOdd,eta);
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
// MdagM^1/4 eta
SchurDifferentiableOperator<Impl> MdagM(DenOp);
ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,PowerQuarter);
msCG_M(MdagM,etaOdd,tmp);
// VdagV^-1/4 MdagM^1/4 eta
SchurDifferentiableOperator<Impl> VdagV(NumOp);
ConjugateGradientMultiShift<FermionField> msCG_V(param.MaxIter,PowerNegQuarter);
msCG_V(VdagV,tmp,PhiOdd);
assert(NumOp.ConstEE() == 1);
assert(DenOp.ConstEE() == 1);
PhiEven = Zero();
};
//////////////////////////////////////////////////////
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
//////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) {
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
FermionField X(NumOp.FermionRedBlackGrid());
FermionField Y(NumOp.FermionRedBlackGrid());
// VdagV^1/4 Phi
SchurDifferentiableOperator<Impl> VdagV(NumOp);
ConjugateGradientMultiShift<FermionField> msCG_V(param.MaxIter,PowerQuarter);
msCG_V(VdagV,PhiOdd,X);
// MdagM^-1/4 VdagV^1/4 Phi
SchurDifferentiableOperator<Impl> MdagM(DenOp);
ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,PowerNegQuarter);
msCG_M(MdagM,X,Y);
// Randomly apply rational bounds checks.
auto grid = NumOp.FermionGrid();
auto r=rand();
grid->Broadcast(0,r);
if ( (r%param.BoundsCheckFreq)==0 ) {
FermionField gauss(NumOp.FermionRedBlackGrid());
gauss = PhiOdd;
HighBoundCheck(MdagM,gauss,param.hi);
InverseSqrtBoundsCheck(param.MaxIter,param.tolerance*100,MdagM,gauss,PowerNegHalf);
}
// Phidag VdagV^1/4 MdagM^-1/4 MdagM^-1/4 VdagV^1/4 Phi
RealD action = norm2(Y);
return action;
};
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
//
// Here, M is some 5D operator and V is the Pauli-Villars field
// N and D makeup the rat. poly of the M term and P and & makeup the rat.poly of the denom term
//
// Need
// dS_f/dU = chi^dag d[P/Q] N/D P/Q chi
// + chi^dag P/Q d[N/D] P/Q chi
// + chi^dag P/Q N/D d[P/Q] chi
//
// P/Q is expressed as partial fraction expansion:
//
// a0 + \sum_k ak/(V^dagV + bk)
//
// d[P/Q] is then
//
// \sum_k -ak [V^dagV+bk]^{-1} [ dV^dag V + V^dag dV ] [V^dag V + bk]^{-1}
//
// and similar for N/D.
//
// Need
// MpvPhi_k = [Vdag V + bk]^{-1} chi
// MpvPhi = {a0 + \sum_k ak [Vdag V + bk]^{-1} }chi
//
// MfMpvPhi_k = [MdagM+bk]^{-1} MpvPhi
// MfMpvPhi = {a0 + \sum_k ak [Mdag M + bk]^{-1} } MpvPhi
//
// MpvMfMpvPhi_k = [Vdag V + bk]^{-1} MfMpvchi
//
virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
const int n_f = PowerNegHalf.poles.size();
const int n_pv = PowerQuarter.poles.size();
std::vector<FermionField> MpvPhi_k (n_pv,NumOp.FermionRedBlackGrid());
std::vector<FermionField> MpvMfMpvPhi_k(n_pv,NumOp.FermionRedBlackGrid());
std::vector<FermionField> MfMpvPhi_k (n_f ,NumOp.FermionRedBlackGrid());
FermionField MpvPhi(NumOp.FermionRedBlackGrid());
FermionField MfMpvPhi(NumOp.FermionRedBlackGrid());
FermionField MpvMfMpvPhi(NumOp.FermionRedBlackGrid());
FermionField Y(NumOp.FermionRedBlackGrid());
GaugeField tmp(NumOp.GaugeGrid());
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
SchurDifferentiableOperator<Impl> VdagV(NumOp);
SchurDifferentiableOperator<Impl> MdagM(DenOp);
ConjugateGradientMultiShift<FermionField> msCG_V(param.MaxIter,PowerQuarter);
ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,PowerNegHalf);
msCG_V(VdagV,PhiOdd,MpvPhi_k,MpvPhi);
msCG_M(MdagM,MpvPhi,MfMpvPhi_k,MfMpvPhi);
msCG_V(VdagV,MfMpvPhi,MpvMfMpvPhi_k,MpvMfMpvPhi);
RealD ak;
dSdU = Zero();
// With these building blocks
//
// dS/dU =
// \sum_k -ak MfMpvPhi_k^dag [ dM^dag M + M^dag dM ] MfMpvPhi_k (1)
// + \sum_k -ak MpvMfMpvPhi_k^\dag [ dV^dag V + V^dag dV ] MpvPhi_k (2)
// -ak MpvPhi_k^dag [ dV^dag V + V^dag dV ] MpvMfMpvPhi_k (3)
//(1)
for(int k=0;k<n_f;k++){
ak = PowerNegHalf.residues[k];
MdagM.Mpc(MfMpvPhi_k[k],Y);
MdagM.MpcDagDeriv(tmp , MfMpvPhi_k[k], Y ); dSdU=dSdU+ak*tmp;
MdagM.MpcDeriv(tmp , Y, MfMpvPhi_k[k] ); dSdU=dSdU+ak*tmp;
}
//(2)
//(3)
for(int k=0;k<n_pv;k++){
ak = PowerQuarter.residues[k];
VdagV.Mpc(MpvPhi_k[k],Y);
VdagV.MpcDagDeriv(tmp,MpvMfMpvPhi_k[k],Y); dSdU=dSdU+ak*tmp;
VdagV.MpcDeriv (tmp,Y,MpvMfMpvPhi_k[k]); dSdU=dSdU+ak*tmp;
VdagV.Mpc(MpvMfMpvPhi_k[k],Y); // V as we take Ydag
VdagV.MpcDeriv (tmp,Y, MpvPhi_k[k]); dSdU=dSdU+ak*tmp;
VdagV.MpcDagDeriv(tmp,MpvPhi_k[k], Y); dSdU=dSdU+ak*tmp;
}
//dSdU = Ta(dSdU);
};
};
NAMESPACE_END(Grid);

View File

@ -49,12 +49,10 @@ NAMESPACE_BEGIN(Grid);
Params param;
MultiShiftFunction PowerHalf ;
MultiShiftFunction PowerQuarter;
MultiShiftFunction PowerNegHalf;
MultiShiftFunction PowerQuarter;
MultiShiftFunction PowerNegQuarter;
MultiShiftFunction MDPowerQuarter;
MultiShiftFunction MDPowerNegHalf;
private:
FermionOperator<Impl> & NumOp;// the basic operator
@ -75,13 +73,11 @@ NAMESPACE_BEGIN(Grid);
remez.generateApprox(param.degree,1,2);
PowerHalf.Init(remez,param.tolerance,false);
PowerNegHalf.Init(remez,param.tolerance,true);
MDPowerNegHalf.Init(remez,param.mdtolerance,true);
// MdagM^(+- 1/4)
std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/4)"<<std::endl;
remez.generateApprox(param.degree,1,4);
PowerQuarter.Init(remez,param.tolerance,false);
MDPowerQuarter.Init(remez,param.mdtolerance,false);
PowerNegQuarter.Init(remez,param.tolerance,true);
};
@ -208,8 +204,8 @@ NAMESPACE_BEGIN(Grid);
virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
const int n_f = MDPowerNegHalf.poles.size();
const int n_pv = MDPowerQuarter.poles.size();
const int n_f = PowerNegHalf.poles.size();
const int n_pv = PowerQuarter.poles.size();
std::vector<FermionField> MpvPhi_k (n_pv,NumOp.FermionGrid());
std::vector<FermionField> MpvMfMpvPhi_k(n_pv,NumOp.FermionGrid());
@ -228,8 +224,8 @@ NAMESPACE_BEGIN(Grid);
MdagMLinearOperator<FermionOperator<Impl> ,FermionField> MdagM(DenOp);
MdagMLinearOperator<FermionOperator<Impl> ,FermionField> VdagV(NumOp);
ConjugateGradientMultiShift<FermionField> msCG_V(param.MaxIter,MDPowerQuarter);
ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,MDPowerNegHalf);
ConjugateGradientMultiShift<FermionField> msCG_V(param.MaxIter,PowerQuarter);
ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,PowerNegHalf);
msCG_V(VdagV,Phi,MpvPhi_k,MpvPhi);
msCG_M(MdagM,MpvPhi,MfMpvPhi_k,MfMpvPhi);
@ -248,7 +244,7 @@ NAMESPACE_BEGIN(Grid);
//(1)
for(int k=0;k<n_f;k++){
ak = MDPowerNegHalf.residues[k];
ak = PowerNegHalf.residues[k];
DenOp.M(MfMpvPhi_k[k],Y);
DenOp.MDeriv(tmp , MfMpvPhi_k[k], Y,DaggerYes ); dSdU=dSdU+ak*tmp;
DenOp.MDeriv(tmp , Y, MfMpvPhi_k[k], DaggerNo ); dSdU=dSdU+ak*tmp;
@ -258,7 +254,7 @@ NAMESPACE_BEGIN(Grid);
//(3)
for(int k=0;k<n_pv;k++){
ak = MDPowerQuarter.residues[k];
ak = PowerQuarter.residues[k];
NumOp.M(MpvPhi_k[k],Y);
NumOp.MDeriv(tmp,MpvMfMpvPhi_k[k],Y,DaggerYes); dSdU=dSdU+ak*tmp;

View File

@ -40,8 +40,6 @@ directory
#include <Grid/qcd/action/pseudofermion/OneFlavourRational.h>
#include <Grid/qcd/action/pseudofermion/OneFlavourRationalRatio.h>
#include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRational.h>
#include <Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h>
#include <Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatioMixedPrec.h>
#include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h>
#include <Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h>

View File

@ -75,22 +75,24 @@ NAMESPACE_BEGIN(Grid);
conformable(_NumOp.GaugeRedBlackGrid(), _DenOp.GaugeRedBlackGrid());
};
virtual std::string action_name(){
std::stringstream sstream;
sstream<<"TwoFlavourEvenOddRatioPseudoFermionAction det("<<DenOp.Mass()<<") / det("<<NumOp.Mass()<<")";
return sstream.str();
}
virtual std::string action_name(){return "TwoFlavourEvenOddRatioPseudoFermionAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream<< GridLogMessage << "["<<action_name()<<"] -- No further parameters "<<std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] has no parameters" << std::endl;
return sstream.str();
}
const FermionField &getPhiOdd() const{ return PhiOdd; }
virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) {
// P(phi) = e^{- phi^dag Vpc (MpcdagMpc)^-1 Vpcdag phi}
//
// NumOp == V
// DenOp == M
//
// Take phi_o = Vpcdag^{-1} Mpcdag eta_o ; eta_o = Mpcdag^{-1} Vpcdag Phi
//
// P(eta_o) = e^{- eta_o^dag eta_o}
//
// e^{x^2/2 sig^2} => sig^2 = 0.5.
@ -98,22 +100,12 @@ NAMESPACE_BEGIN(Grid);
RealD scale = std::sqrt(0.5);
FermionField eta (NumOp.FermionGrid());
gaussian(pRNG,eta); eta = eta * scale;
refresh(U,eta);
}
void refresh(const GaugeField &U, const FermionField &eta) {
// P(phi) = e^{- phi^dag Vpc (MpcdagMpc)^-1 Vpcdag phi}
//
// NumOp == V
// DenOp == M
//
FermionField etaOdd (NumOp.FermionRedBlackGrid());
FermionField etaEven(NumOp.FermionRedBlackGrid());
FermionField tmp (NumOp.FermionRedBlackGrid());
gaussian(pRNG,eta);
pickCheckerboard(Even,etaEven,eta);
pickCheckerboard(Odd,etaOdd,eta);
@ -132,6 +124,10 @@ NAMESPACE_BEGIN(Grid);
// Even det factors
DenOp.MooeeDag(etaEven,tmp);
NumOp.MooeeInvDag(tmp,PhiEven);
PhiOdd =PhiOdd*scale;
PhiEven=PhiEven*scale;
};
//////////////////////////////////////////////////////

View File

@ -1,203 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/TwoFlavourRatio.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
///////////////////////////////////////
// Two flavour ratio
///////////////////////////////////////
template<class Impl>
class TwoFlavourRatioEO4DPseudoFermionAction : public Action<typename Impl::GaugeField> {
public:
INHERIT_IMPL_TYPES(Impl);
private:
typedef FermionOperator<Impl> FermOp;
FermionOperator<Impl> & NumOp;// the basic operator
FermionOperator<Impl> & DenOp;// the basic operator
OperatorFunction<FermionField> &DerivativeSolver;
OperatorFunction<FermionField> &DerivativeDagSolver;
OperatorFunction<FermionField> &ActionSolver;
OperatorFunction<FermionField> &HeatbathSolver;
FermionField phi4; // the pseudo fermion field for this trajectory
public:
TwoFlavourRatioEO4DPseudoFermionAction(FermionOperator<Impl> &_NumOp,
FermionOperator<Impl> &_DenOp,
OperatorFunction<FermionField> & DS,
OperatorFunction<FermionField> & AS ) :
TwoFlavourRatioEO4DPseudoFermionAction(_NumOp,_DenOp, DS,DS,AS,AS) {};
TwoFlavourRatioEO4DPseudoFermionAction(FermionOperator<Impl> &_NumOp,
FermionOperator<Impl> &_DenOp,
OperatorFunction<FermionField> & DS,
OperatorFunction<FermionField> & DDS,
OperatorFunction<FermionField> & AS,
OperatorFunction<FermionField> & HS
) : NumOp(_NumOp),
DenOp(_DenOp),
DerivativeSolver(DS),
DerivativeDagSolver(DDS),
ActionSolver(AS),
HeatbathSolver(HS),
phi4(_NumOp.GaugeGrid())
{};
virtual std::string action_name(){return "TwoFlavourRatioEO4DPseudoFermionAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name()<<"] has no parameters" << std::endl;
return sstream.str();
}
virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) {
// P(phi) = e^{- phi^dag (V^dag M^-dag)_11 (M^-1 V)_11 phi}
//
// NumOp == V
// DenOp == M
//
// Take phi = (V^{-1} M)_11 eta ; eta = (M^{-1} V)_11 Phi
//
// P(eta) = e^{- eta^dag eta}
//
// e^{x^2/2 sig^2} => sig^2 = 0.5.
//
// So eta should be of width sig = 1/sqrt(2) and must multiply by 0.707....
//
RealD scale = std::sqrt(0.5);
FermionField eta4(NumOp.GaugeGrid());
FermionField eta5(NumOp.FermionGrid());
FermionField tmp(NumOp.FermionGrid());
FermionField phi5(NumOp.FermionGrid());
gaussian(pRNG,eta4);
NumOp.ImportFourDimPseudoFermion(eta4,eta5);
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
SchurRedBlackDiagMooeeSolve<FermionField> PrecSolve(HeatbathSolver);
DenOp.M(eta5,tmp); // M eta
PrecSolve(NumOp,tmp,phi5); // phi = V^-1 M eta
phi5=phi5*scale;
std::cout << GridLogMessage << "4d pf refresh "<< norm2(phi5)<<"\n";
// Project to 4d
NumOp.ExportFourDimPseudoFermion(phi5,phi4);
};
//////////////////////////////////////////////////////
// S = phi^dag (V^dag M^-dag)_11 (M^-1 V)_11 phi
//////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) {
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
FermionField Y4(NumOp.GaugeGrid());
FermionField X(NumOp.FermionGrid());
FermionField Y(NumOp.FermionGrid());
FermionField phi5(NumOp.FermionGrid());
MdagMLinearOperator<FermionOperator<Impl> ,FermionField> MdagMOp(DenOp);
SchurRedBlackDiagMooeeSolve<FermionField> PrecSolve(ActionSolver);
NumOp.ImportFourDimPseudoFermion(phi4,phi5);
NumOp.M(phi5,X); // X= V phi
PrecSolve(DenOp,X,Y); // Y= (MdagM)^-1 Mdag Vdag phi = M^-1 V phi
NumOp.ExportFourDimPseudoFermion(Y,Y4);
RealD action = norm2(Y4);
return action;
};
//////////////////////////////////////////////////////
// dS/du = 2 Re phi^dag (V^dag M^-dag)_11 (M^-1 d V)_11 phi
// - 2 Re phi^dag (dV^dag M^-dag)_11 (M^-1 dM M^-1 V)_11 phi
//////////////////////////////////////////////////////
virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
FermionField X(NumOp.FermionGrid());
FermionField Y(NumOp.FermionGrid());
FermionField phi(NumOp.FermionGrid());
FermionField Vphi(NumOp.FermionGrid());
FermionField MinvVphi(NumOp.FermionGrid());
FermionField tmp4(NumOp.GaugeGrid());
FermionField MdagInvMinvVphi(NumOp.FermionGrid());
GaugeField force(NumOp.GaugeGrid());
//Y=V phi
//X = (Mdag V phi
//Y = (Mdag M)^-1 Mdag V phi = M^-1 V Phi
NumOp.ImportFourDimPseudoFermion(phi4,phi);
NumOp.M(phi,Vphi); // V phi
SchurRedBlackDiagMooeeSolve<FermionField> PrecSolve(DerivativeSolver);
PrecSolve(DenOp,Vphi,MinvVphi);// M^-1 V phi
std::cout << GridLogMessage << "4d deriv solve "<< norm2(MinvVphi)<<"\n";
// Projects onto the physical space and back
NumOp.ExportFourDimPseudoFermion(MinvVphi,tmp4);
NumOp.ImportFourDimPseudoFermion(tmp4,Y);
SchurRedBlackDiagMooeeDagSolve<FermionField> PrecDagSolve(DerivativeDagSolver);
// X = proj M^-dag V phi
// Need an adjoint solve
PrecDagSolve(DenOp,Y,MdagInvMinvVphi);
std::cout << GridLogMessage << "4d deriv solve dag "<< norm2(MdagInvMinvVphi)<<"\n";
// phi^dag (Vdag Mdag^-1) (M^-1 dV) phi
NumOp.MDeriv(force ,MdagInvMinvVphi , phi, DaggerNo ); dSdU=force;
// phi^dag (dVdag Mdag^-1) (M^-1 V) phi
NumOp.MDeriv(force , phi, MdagInvMinvVphi ,DaggerYes ); dSdU=dSdU+force;
// - 2 Re phi^dag (dV^dag M^-dag)_11 (M^-1 dM M^-1 V)_11 phi
DenOp.MDeriv(force,MdagInvMinvVphi,MinvVphi,DaggerNo); dSdU=dSdU-force;
DenOp.MDeriv(force,MinvVphi,MdagInvMinvVphi,DaggerYes); dSdU=dSdU-force;
dSdU *= -1.0;
//dSdU = - Ta(dSdU);
};
};
NAMESPACE_END(Grid);

View File

@ -1,6 +0,0 @@
#ifndef GRID_GPARITY_H_
#define GRID_GPARITY_H_
#include<Grid/qcd/gparity/GparityFlavour.h>
#endif

View File

@ -1,34 +0,0 @@
#include <Grid/Grid.h>
NAMESPACE_BEGIN(Grid);
const std::array<const GparityFlavour, 3> GparityFlavour::sigma_mu = {{
GparityFlavour(GparityFlavour::Algebra::SigmaX),
GparityFlavour(GparityFlavour::Algebra::SigmaY),
GparityFlavour(GparityFlavour::Algebra::SigmaZ)
}};
const std::array<const GparityFlavour, 6> GparityFlavour::sigma_all = {{
GparityFlavour(GparityFlavour::Algebra::Identity),
GparityFlavour(GparityFlavour::Algebra::SigmaX),
GparityFlavour(GparityFlavour::Algebra::SigmaY),
GparityFlavour(GparityFlavour::Algebra::SigmaZ),
GparityFlavour(GparityFlavour::Algebra::ProjPlus),
GparityFlavour(GparityFlavour::Algebra::ProjMinus)
}};
const std::array<const char *, GparityFlavour::nSigma> GparityFlavour::name = {{
"SigmaX",
"MinusSigmaX",
"SigmaY",
"MinusSigmaY",
"SigmaZ",
"MinusSigmaZ",
"Identity",
"MinusIdentity",
"ProjPlus",
"MinusProjPlus",
"ProjMinus",
"MinusProjMinus"}};
NAMESPACE_END(Grid);

View File

@ -1,475 +0,0 @@
#ifndef GRID_QCD_GPARITY_FLAVOUR_H
#define GRID_QCD_GPARITY_FLAVOUR_H
//Support for flavour-matrix operations acting on the G-parity flavour index
#include <array>
NAMESPACE_BEGIN(Grid);
class GparityFlavour {
public:
GRID_SERIALIZABLE_ENUM(Algebra, undef,
SigmaX, 0,
MinusSigmaX, 1,
SigmaY, 2,
MinusSigmaY, 3,
SigmaZ, 4,
MinusSigmaZ, 5,
Identity, 6,
MinusIdentity, 7,
ProjPlus, 8,
MinusProjPlus, 9,
ProjMinus, 10,
MinusProjMinus, 11
);
static constexpr unsigned int nSigma = 12;
static const std::array<const char *, nSigma> name;
static const std::array<const GparityFlavour, 3> sigma_mu;
static const std::array<const GparityFlavour, 6> sigma_all;
Algebra g;
public:
accelerator GparityFlavour(Algebra initg): g(initg) {}
};
// 0 1 x vector
// 1 0
template<class vtype>
accelerator_inline void multFlavourSigmaX(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = rhs(1);
ret(1) = rhs(0);
};
template<class vtype>
accelerator_inline void lmultFlavourSigmaX(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(1,0);
ret(0,1) = rhs(1,1);
ret(1,0) = rhs(0,0);
ret(1,1) = rhs(0,1);
};
template<class vtype>
accelerator_inline void rmultFlavourSigmaX(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(0,1);
ret(0,1) = rhs(0,0);
ret(1,0) = rhs(1,1);
ret(1,1) = rhs(1,0);
};
template<class vtype>
accelerator_inline void multFlavourMinusSigmaX(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = -rhs(1);
ret(1) = -rhs(0);
};
template<class vtype>
accelerator_inline void lmultFlavourMinusSigmaX(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(1,0);
ret(0,1) = -rhs(1,1);
ret(1,0) = -rhs(0,0);
ret(1,1) = -rhs(0,1);
};
template<class vtype>
accelerator_inline void rmultFlavourMinusSigmaX(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(0,1);
ret(0,1) = -rhs(0,0);
ret(1,0) = -rhs(1,1);
ret(1,1) = -rhs(1,0);
};
// 0 -i x vector
// i 0
template<class vtype>
accelerator_inline void multFlavourSigmaY(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = timesMinusI(rhs(1));
ret(1) = timesI(rhs(0));
};
template<class vtype>
accelerator_inline void lmultFlavourSigmaY(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = timesMinusI(rhs(1,0));
ret(0,1) = timesMinusI(rhs(1,1));
ret(1,0) = timesI(rhs(0,0));
ret(1,1) = timesI(rhs(0,1));
};
template<class vtype>
accelerator_inline void rmultFlavourSigmaY(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = timesI(rhs(0,1));
ret(0,1) = timesMinusI(rhs(0,0));
ret(1,0) = timesI(rhs(1,1));
ret(1,1) = timesMinusI(rhs(1,0));
};
template<class vtype>
accelerator_inline void multFlavourMinusSigmaY(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = timesI(rhs(1));
ret(1) = timesMinusI(rhs(0));
};
template<class vtype>
accelerator_inline void lmultFlavourMinusSigmaY(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = timesI(rhs(1,0));
ret(0,1) = timesI(rhs(1,1));
ret(1,0) = timesMinusI(rhs(0,0));
ret(1,1) = timesMinusI(rhs(0,1));
};
template<class vtype>
accelerator_inline void rmultFlavourMinusSigmaY(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = timesMinusI(rhs(0,1));
ret(0,1) = timesI(rhs(0,0));
ret(1,0) = timesMinusI(rhs(1,1));
ret(1,1) = timesI(rhs(1,0));
};
// 1 0 x vector
// 0 -1
template<class vtype>
accelerator_inline void multFlavourSigmaZ(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = rhs(0);
ret(1) = -rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourSigmaZ(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(0,0);
ret(0,1) = rhs(0,1);
ret(1,0) = -rhs(1,0);
ret(1,1) = -rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourSigmaZ(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(0,0);
ret(0,1) = -rhs(0,1);
ret(1,0) = rhs(1,0);
ret(1,1) = -rhs(1,1);
};
template<class vtype>
accelerator_inline void multFlavourMinusSigmaZ(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = -rhs(0);
ret(1) = rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourMinusSigmaZ(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(0,0);
ret(0,1) = -rhs(0,1);
ret(1,0) = rhs(1,0);
ret(1,1) = rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourMinusSigmaZ(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(0,0);
ret(0,1) = rhs(0,1);
ret(1,0) = -rhs(1,0);
ret(1,1) = rhs(1,1);
};
template<class vtype>
accelerator_inline void multFlavourIdentity(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = rhs(0);
ret(1) = rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourIdentity(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(0,0);
ret(0,1) = rhs(0,1);
ret(1,0) = rhs(1,0);
ret(1,1) = rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourIdentity(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(0,0);
ret(0,1) = rhs(0,1);
ret(1,0) = rhs(1,0);
ret(1,1) = rhs(1,1);
};
template<class vtype>
accelerator_inline void multFlavourMinusIdentity(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = -rhs(0);
ret(1) = -rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourMinusIdentity(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(0,0);
ret(0,1) = -rhs(0,1);
ret(1,0) = -rhs(1,0);
ret(1,1) = -rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourMinusIdentity(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(0,0);
ret(0,1) = -rhs(0,1);
ret(1,0) = -rhs(1,0);
ret(1,1) = -rhs(1,1);
};
//G-parity flavour projection 1/2(1+\sigma_2)
//1 -i
//i 1
template<class vtype>
accelerator_inline void multFlavourProjPlus(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = 0.5*rhs(0) + 0.5*timesMinusI(rhs(1));
ret(1) = 0.5*timesI(rhs(0)) + 0.5*rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourProjPlus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = 0.5*rhs(0,0) + 0.5*timesMinusI(rhs(1,0));
ret(0,1) = 0.5*rhs(0,1) + 0.5*timesMinusI(rhs(1,1));
ret(1,0) = 0.5*timesI(rhs(0,0)) + 0.5*rhs(1,0);
ret(1,1) = 0.5*timesI(rhs(0,1)) + 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourProjPlus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = 0.5*rhs(0,0) + 0.5*timesI(rhs(0,1));
ret(0,1) = 0.5*timesMinusI(rhs(0,0)) + 0.5*rhs(0,1);
ret(1,0) = 0.5*rhs(1,0) + 0.5*timesI(rhs(1,1));
ret(1,1) = 0.5*timesMinusI(rhs(1,0)) + 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void multFlavourMinusProjPlus(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = -0.5*rhs(0) + 0.5*timesI(rhs(1));
ret(1) = 0.5*timesMinusI(rhs(0)) - 0.5*rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourMinusProjPlus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -0.5*rhs(0,0) + 0.5*timesI(rhs(1,0));
ret(0,1) = -0.5*rhs(0,1) + 0.5*timesI(rhs(1,1));
ret(1,0) = 0.5*timesMinusI(rhs(0,0)) - 0.5*rhs(1,0);
ret(1,1) = 0.5*timesMinusI(rhs(0,1)) - 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourMinusProjPlus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -0.5*rhs(0,0) + 0.5*timesMinusI(rhs(0,1));
ret(0,1) = 0.5*timesI(rhs(0,0)) - 0.5*rhs(0,1);
ret(1,0) = -0.5*rhs(1,0) + 0.5*timesMinusI(rhs(1,1));
ret(1,1) = 0.5*timesI(rhs(1,0)) - 0.5*rhs(1,1);
};
//G-parity flavour projection 1/2(1-\sigma_2)
//1 i
//-i 1
template<class vtype>
accelerator_inline void multFlavourProjMinus(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = 0.5*rhs(0) + 0.5*timesI(rhs(1));
ret(1) = 0.5*timesMinusI(rhs(0)) + 0.5*rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourProjMinus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = 0.5*rhs(0,0) + 0.5*timesI(rhs(1,0));
ret(0,1) = 0.5*rhs(0,1) + 0.5*timesI(rhs(1,1));
ret(1,0) = 0.5*timesMinusI(rhs(0,0)) + 0.5*rhs(1,0);
ret(1,1) = 0.5*timesMinusI(rhs(0,1)) + 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourProjMinus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = 0.5*rhs(0,0) + 0.5*timesMinusI(rhs(0,1));
ret(0,1) = 0.5*timesI(rhs(0,0)) + 0.5*rhs(0,1);
ret(1,0) = 0.5*rhs(1,0) + 0.5*timesMinusI(rhs(1,1));
ret(1,1) = 0.5*timesI(rhs(1,0)) + 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void multFlavourMinusProjMinus(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = -0.5*rhs(0) + 0.5*timesMinusI(rhs(1));
ret(1) = 0.5*timesI(rhs(0)) - 0.5*rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourMinusProjMinus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -0.5*rhs(0,0) + 0.5*timesMinusI(rhs(1,0));
ret(0,1) = -0.5*rhs(0,1) + 0.5*timesMinusI(rhs(1,1));
ret(1,0) = 0.5*timesI(rhs(0,0)) - 0.5*rhs(1,0);
ret(1,1) = 0.5*timesI(rhs(0,1)) - 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourMinusProjMinus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -0.5*rhs(0,0) + 0.5*timesI(rhs(0,1));
ret(0,1) = 0.5*timesMinusI(rhs(0,0)) - 0.5*rhs(0,1);
ret(1,0) = -0.5*rhs(1,0) + 0.5*timesI(rhs(1,1));
ret(1,1) = 0.5*timesMinusI(rhs(1,0)) - 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline auto operator*(const GparityFlavour &G, const iVector<vtype, Ngp> &arg)
->typename std::enable_if<matchGridTensorIndex<iVector<vtype, Ngp>, GparityFlavourTensorIndex>::value, iVector<vtype, Ngp>>::type
{
iVector<vtype, Ngp> ret;
switch (G.g)
{
case GparityFlavour::Algebra::SigmaX:
multFlavourSigmaX(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaX:
multFlavourMinusSigmaX(ret, arg); break;
case GparityFlavour::Algebra::SigmaY:
multFlavourSigmaY(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaY:
multFlavourMinusSigmaY(ret, arg); break;
case GparityFlavour::Algebra::SigmaZ:
multFlavourSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaZ:
multFlavourMinusSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::Identity:
multFlavourIdentity(ret, arg); break;
case GparityFlavour::Algebra::MinusIdentity:
multFlavourMinusIdentity(ret, arg); break;
case GparityFlavour::Algebra::ProjPlus:
multFlavourProjPlus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjPlus:
multFlavourMinusProjPlus(ret, arg); break;
case GparityFlavour::Algebra::ProjMinus:
multFlavourProjMinus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjMinus:
multFlavourMinusProjMinus(ret, arg); break;
default: assert(0);
}
return ret;
}
template<class vtype>
accelerator_inline auto operator*(const GparityFlavour &G, const iMatrix<vtype, Ngp> &arg)
->typename std::enable_if<matchGridTensorIndex<iMatrix<vtype, Ngp>, GparityFlavourTensorIndex>::value, iMatrix<vtype, Ngp>>::type
{
iMatrix<vtype, Ngp> ret;
switch (G.g)
{
case GparityFlavour::Algebra::SigmaX:
lmultFlavourSigmaX(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaX:
lmultFlavourMinusSigmaX(ret, arg); break;
case GparityFlavour::Algebra::SigmaY:
lmultFlavourSigmaY(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaY:
lmultFlavourMinusSigmaY(ret, arg); break;
case GparityFlavour::Algebra::SigmaZ:
lmultFlavourSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaZ:
lmultFlavourMinusSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::Identity:
lmultFlavourIdentity(ret, arg); break;
case GparityFlavour::Algebra::MinusIdentity:
lmultFlavourMinusIdentity(ret, arg); break;
case GparityFlavour::Algebra::ProjPlus:
lmultFlavourProjPlus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjPlus:
lmultFlavourMinusProjPlus(ret, arg); break;
case GparityFlavour::Algebra::ProjMinus:
lmultFlavourProjMinus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjMinus:
lmultFlavourMinusProjMinus(ret, arg); break;
default: assert(0);
}
return ret;
}
template<class vtype>
accelerator_inline auto operator*(const iMatrix<vtype, Ngp> &arg, const GparityFlavour &G)
->typename std::enable_if<matchGridTensorIndex<iMatrix<vtype, Ngp>, GparityFlavourTensorIndex>::value, iMatrix<vtype, Ngp>>::type
{
iMatrix<vtype, Ngp> ret;
switch (G.g)
{
case GparityFlavour::Algebra::SigmaX:
rmultFlavourSigmaX(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaX:
rmultFlavourMinusSigmaX(ret, arg); break;
case GparityFlavour::Algebra::SigmaY:
rmultFlavourSigmaY(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaY:
rmultFlavourMinusSigmaY(ret, arg); break;
case GparityFlavour::Algebra::SigmaZ:
rmultFlavourSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaZ:
rmultFlavourMinusSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::Identity:
rmultFlavourIdentity(ret, arg); break;
case GparityFlavour::Algebra::MinusIdentity:
rmultFlavourMinusIdentity(ret, arg); break;
case GparityFlavour::Algebra::ProjPlus:
rmultFlavourProjPlus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjPlus:
rmultFlavourMinusProjPlus(ret, arg); break;
case GparityFlavour::Algebra::ProjMinus:
rmultFlavourProjMinus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjMinus:
rmultFlavourMinusProjMinus(ret, arg); break;
default: assert(0);
}
return ret;
}
NAMESPACE_END(Grid);
#endif // include guard

View File

@ -129,10 +129,18 @@ public:
Runner(S);
}
//Use the checkpointer to initialize the RNGs and the gauge field, writing the resulting gauge field into U.
//This is called automatically by Run but may be useful elsewhere, e.g. for integrator tuning experiments
void initializeGaugeFieldAndRNGs(Field &U){
if(!Resources.haveRNGs()) Resources.AddRNGs();
//////////////////////////////////////////////////////////////////
private:
template <class SmearingPolicy>
void Runner(SmearingPolicy &Smearing) {
auto UGrid = Resources.GetCartesian();
Resources.AddRNGs();
Field U(UGrid);
// Can move this outside?
typedef IntegratorType<SmearingPolicy> TheIntegrator;
TheIntegrator MDynamics(UGrid, Parameters.MD, TheAction, Smearing);
if (Parameters.StartingType == "HotStart") {
// Hot start
@ -151,43 +159,14 @@ public:
Resources.GetCheckPointer()->CheckpointRestore(Parameters.StartTrajectory, U,
Resources.GetSerialRNG(),
Resources.GetParallelRNG());
} else if (Parameters.StartingType == "CheckpointStartReseed") {
// Same as CheckpointRestart but reseed the RNGs using the fixed integer seeding used for ColdStart and HotStart
// Useful for creating new evolution streams from an existing stream
// WARNING: Unfortunately because the checkpointer doesn't presently allow us to separately restore the RNG and gauge fields we have to load
// an existing RNG checkpoint first; make sure one is available and named correctly
Resources.GetCheckPointer()->CheckpointRestore(Parameters.StartTrajectory, U,
Resources.GetSerialRNG(),
Resources.GetParallelRNG());
Resources.SeedFixedIntegers();
} else {
// others
std::cout << GridLogError << "Unrecognized StartingType\n";
std::cout
<< GridLogError
<< "Valid [HotStart, ColdStart, TepidStart, CheckpointStart, CheckpointStartReseed]\n";
<< "Valid [HotStart, ColdStart, TepidStart, CheckpointStart]\n";
exit(1);
}
}
//////////////////////////////////////////////////////////////////
private:
template <class SmearingPolicy>
void Runner(SmearingPolicy &Smearing) {
auto UGrid = Resources.GetCartesian();
Field U(UGrid);
initializeGaugeFieldAndRNGs(U);
typedef IntegratorType<SmearingPolicy> TheIntegrator;
TheIntegrator MDynamics(UGrid, Parameters.MD, TheAction, Smearing);
// Sets the momentum filter
MDynamics.setMomentumFilter(*(Resources.GetMomentumFilter()));
Smearing.set_Field(U);

View File

@ -34,7 +34,6 @@ directory
* @brief Classes for Hybrid Monte Carlo update
*
* @author Guido Cossu
* @author Peter Boyle
*/
//--------------------------------------------------------------------
#pragma once
@ -116,17 +115,22 @@ private:
random(sRNG, rn_test);
std::cout << GridLogHMC << "--------------------------------------------------\n";
std::cout << GridLogHMC << "exp(-dH) = " << prob << " Random = " << rn_test << "\n";
std::cout << GridLogHMC << "Acc. Probability = " << ((prob < 1.0) ? prob : 1.0) << "\n";
std::cout << GridLogMessage
<< "--------------------------------------------------\n";
std::cout << GridLogMessage << "exp(-dH) = " << prob
<< " Random = " << rn_test << "\n";
std::cout << GridLogMessage
<< "Acc. Probability = " << ((prob < 1.0) ? prob : 1.0) << "\n";
if ((prob > 1.0) || (rn_test <= prob)) { // accepted
std::cout << GridLogHMC << "Metropolis_test -- ACCEPTED\n";
std::cout << GridLogHMC << "--------------------------------------------------\n";
std::cout << GridLogMessage << "Metropolis_test -- ACCEPTED\n";
std::cout << GridLogMessage
<< "--------------------------------------------------\n";
return true;
} else { // rejected
std::cout << GridLogHMC << "Metropolis_test -- REJECTED\n";
std::cout << GridLogHMC << "--------------------------------------------------\n";
std::cout << GridLogMessage << "Metropolis_test -- REJECTED\n";
std::cout << GridLogMessage
<< "--------------------------------------------------\n";
return false;
}
}
@ -135,68 +139,19 @@ private:
// Evolution
/////////////////////////////////////////////////////////
RealD evolve_hmc_step(Field &U) {
TheIntegrator.refresh(U, sRNG, pRNG); // set U and initialize P and phi's
GridBase *Grid = U.Grid();
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Mainly for DDHMC perform a random translation of U modulo volume
//////////////////////////////////////////////////////////////////////////////////////////////////////
std::cout << GridLogMessage << "--------------------------------------------------\n";
std::cout << GridLogMessage << "Random shifting gauge field by [";
for(int d=0;d<Grid->Nd();d++) {
int L = Grid->GlobalDimensions()[d];
RealD rn_uniform; random(sRNG, rn_uniform);
int shift = (int) (rn_uniform*L);
std::cout << shift;
if(d<Grid->Nd()-1) std::cout <<",";
else std::cout <<"]\n";
U = Cshift(U,d,shift);
}
std::cout << GridLogMessage << "--------------------------------------------------\n";
TheIntegrator.reset_timer();
//////////////////////////////////////////////////////////////////////////////////////////////////////
// set U and initialize P and phi's
//////////////////////////////////////////////////////////////////////////////////////////////////////
std::cout << GridLogMessage << "--------------------------------------------------\n";
std::cout << GridLogMessage << "Refresh momenta and pseudofermions";
TheIntegrator.refresh(U, sRNG, pRNG);
std::cout << GridLogMessage << "--------------------------------------------------\n";
//////////////////////////////////////////////////////////////////////////////////////////////////////
// initial state action
//////////////////////////////////////////////////////////////////////////////////////////////////////
std::cout << GridLogMessage << "--------------------------------------------------\n";
std::cout << GridLogMessage << "Compute initial action";
RealD H0 = TheIntegrator.S(U);
std::cout << GridLogMessage << "--------------------------------------------------\n";
RealD H0 = TheIntegrator.S(U); // initial state action
std::streamsize current_precision = std::cout.precision();
std::cout.precision(15);
std::cout << GridLogHMC << "Total H before trajectory = " << H0 << "\n";
std::cout << GridLogMessage << "Total H before trajectory = " << H0 << "\n";
std::cout.precision(current_precision);
std::cout << GridLogMessage << "--------------------------------------------------\n";
std::cout << GridLogMessage << " Molecular Dynamics evolution ";
TheIntegrator.integrate(U);
std::cout << GridLogMessage << "--------------------------------------------------\n";
//////////////////////////////////////////////////////////////////////////////////////////////////////
// updated state action
//////////////////////////////////////////////////////////////////////////////////////////////////////
std::cout << GridLogMessage << "--------------------------------------------------\n";
std::cout << GridLogMessage << "Compute final action";
RealD H1 = TheIntegrator.S(U);
std::cout << GridLogMessage << "--------------------------------------------------\n";
RealD H1 = TheIntegrator.S(U); // updated state action
///////////////////////////////////////////////////////////
if(0){
std::cout << "------------------------- Reversibility test" << std::endl;
@ -208,16 +163,17 @@ private:
}
///////////////////////////////////////////////////////////
std::cout.precision(15);
std::cout << GridLogHMC << "--------------------------------------------------\n";
std::cout << GridLogHMC << "Total H after trajectory = " << H1 << " dH = " << H1 - H0 << "\n";
std::cout << GridLogHMC << "--------------------------------------------------\n";
std::cout << GridLogMessage << "Total H after trajectory = " << H1
<< " dH = " << H1 - H0 << "\n";
std::cout.precision(current_precision);
return (H1 - H0);
}
public:
/////////////////////////////////////////
@ -239,13 +195,10 @@ public:
// Actual updates (evolve a copy Ucopy then copy back eventually)
unsigned int FinalTrajectory = Params.Trajectories + Params.NoMetropolisUntil + Params.StartTrajectory;
for (int traj = Params.StartTrajectory; traj < FinalTrajectory; ++traj) {
std::cout << GridLogHMC << "-- # Trajectory = " << traj << "\n";
std::cout << GridLogMessage << "-- # Trajectory = " << traj << "\n";
if (traj < Params.StartTrajectory + Params.NoMetropolisUntil) {
std::cout << GridLogHMC << "-- Thermalization" << std::endl;
std::cout << GridLogMessage << "-- Thermalization" << std::endl;
}
double t0=usecond();
@ -254,19 +207,20 @@ public:
DeltaH = evolve_hmc_step(Ucopy);
// Metropolis-Hastings test
bool accept = true;
if (Params.MetropolisTest && traj >= Params.StartTrajectory + Params.NoMetropolisUntil) {
if (traj >= Params.StartTrajectory + Params.NoMetropolisUntil) {
accept = metropolis_test(DeltaH);
} else {
std::cout << GridLogHMC << "Skipping Metropolis test" << std::endl;
std::cout << GridLogMessage << "Skipping Metropolis test" << std::endl;
}
if (accept)
Ucur = Ucopy;
double t1=usecond();
std::cout << GridLogHMC << "Total time for trajectory (s): " << (t1-t0)/1e6 << std::endl;
std::cout << GridLogMessage << "Total time for trajectory (s): " << (t1-t0)/1e6 << std::endl;
TheIntegrator.print_timer();
for (int obs = 0; obs < Observables.size(); obs++) {
std::cout << GridLogDebug << "Observables # " << obs << std::endl;
@ -274,7 +228,7 @@ public:
std::cout << GridLogDebug << "Observables pointer " << Observables[obs] << std::endl;
Observables[obs]->TrajectoryComplete(traj + 1, Ucur, sRNG, pRNG);
}
std::cout << GridLogHMC << ":::::::::::::::::::::::::::::::::::::::::::" << std::endl;
std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::::" << std::endl;
}
}

View File

@ -80,9 +80,7 @@ public:
std::cout << GridLogError << "Seeds not initialized" << std::endl;
exit(1);
}
std::cout << GridLogMessage << "Reseeding serial RNG with seed vector " << SerialSeeds << std::endl;
sRNG_.SeedFixedIntegers(SerialSeeds);
std::cout << GridLogMessage << "Reseeding parallel RNG with seed vector " << ParallelSeeds << std::endl;
pRNG_->SeedFixedIntegers(ParallelSeeds);
}
};

View File

@ -72,8 +72,6 @@ class HMCResourceManager {
typedef HMCModuleBase< BaseHmcCheckpointer<ImplementationPolicy> > CheckpointerBaseModule;
typedef HMCModuleBase< HmcObservable<typename ImplementationPolicy::Field> > ObservableBaseModule;
typedef ActionModuleBase< Action<typename ImplementationPolicy::Field>, GridModule > ActionBaseModule;
typedef typename ImplementationPolicy::Field MomentaField;
typedef typename ImplementationPolicy::Field Field;
// Named storage for grid pairs (std + red-black)
std::unordered_map<std::string, GridModule> Grids;
@ -82,9 +80,6 @@ class HMCResourceManager {
// SmearingModule<ImplementationPolicy> Smearing;
std::unique_ptr<CheckpointerBaseModule> CP;
// Momentum filter
std::unique_ptr<MomentumFilterBase<typename ImplementationPolicy::Field> > Filter;
// A vector of HmcObservable modules
std::vector<std::unique_ptr<ObservableBaseModule> > ObservablesList;
@ -95,7 +90,6 @@ class HMCResourceManager {
bool have_RNG;
bool have_CheckPointer;
bool have_Filter;
// NOTE: operator << is not overloaded for std::vector<string>
// so this function is necessary
@ -107,7 +101,7 @@ class HMCResourceManager {
public:
HMCResourceManager() : have_RNG(false), have_CheckPointer(false), have_Filter(false) {}
HMCResourceManager() : have_RNG(false), have_CheckPointer(false) {}
template <class ReaderClass, class vector_type = vComplex >
void initialize(ReaderClass &Read){
@ -135,7 +129,6 @@ public:
RNGModuleParameters RNGpar(Read);
SetRNGSeeds(RNGpar);
// Observables
auto &ObsFactory = HMC_ObservablesModuleFactory<observable_string, typename ImplementationPolicy::Field, ReaderClass>::getInstance();
Read.push(observable_string);// here must check if existing...
@ -215,16 +208,6 @@ public:
AddGrid(s, Mod);
}
void SetMomentumFilter( MomentumFilterBase<typename ImplementationPolicy::Field> * MomFilter) {
assert(have_Filter==false);
Filter = std::unique_ptr<MomentumFilterBase<typename ImplementationPolicy::Field> >(MomFilter);
have_Filter = true;
}
MomentumFilterBase<typename ImplementationPolicy::Field> *GetMomentumFilter(void) {
if ( !have_Filter)
SetMomentumFilter(new MomentumFilterNone<typename ImplementationPolicy::Field>());
return Filter.get();
}
GridCartesian* GetCartesian(std::string s = "") {
if (s.empty()) s = Grids.begin()->first;
@ -243,9 +226,6 @@ public:
//////////////////////////////////////////////////////
// Random number generators
//////////////////////////////////////////////////////
//Return true if the RNG objects have been instantiated
bool haveRNGs() const{ return have_RNG; }
void AddRNGs(std::string s = "") {
// Couple the RNGs to the GridModule tagged by s

View File

@ -33,6 +33,7 @@ directory
#define INTEGRATOR_INCLUDED
#include <memory>
#include "MomentumFilter.h"
NAMESPACE_BEGIN(Grid);
@ -66,7 +67,6 @@ public:
template <class FieldImplementation, class SmearingPolicy, class RepresentationPolicy>
class Integrator {
protected:
typedef typename FieldImplementation::Field MomentaField; //for readability
typedef typename FieldImplementation::Field Field;
@ -119,58 +119,36 @@ protected:
}
} update_P_hireps{};
void update_P(MomentaField& Mom, Field& U, int level, double ep) {
// input U actually not used in the fundamental case
// Fundamental updates, include smearing
for (int a = 0; a < as[level].actions.size(); ++a) {
double start_full = usecond();
Field force(U.Grid());
conformable(U.Grid(), Mom.Grid());
Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared);
double start_force = usecond();
as[level].actions.at(a)->deriv_timer_start();
as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta
as[level].actions.at(a)->deriv_timer_stop();
std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl;
auto name = as[level].actions.at(a)->action_name();
if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
force = FieldImplementation::projectForce(force); // Ta for gauge fields
double end_force = usecond();
MomFilter->applyFilter(force);
std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<<a <<"] "<<name<< std::endl;
DumpSliceNorm("force ",force,Nd-1);
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x])
Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
Real force_max = std::sqrt(maxLocalNorm2(force));
Real impulse_max = force_max * ep * HMC_MOMENTUM_DENOMINATOR;
as[level].actions.at(a)->deriv_log(force_abs,force_max);
std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Force average: " << force_abs <<" "<<name<<std::endl;
std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Force max : " << force_max <<" "<<name<<std::endl;
std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Fdt average : " << impulse_abs <<" "<<name<<std::endl;
std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Fdt max : " << impulse_max <<" "<<name<<std::endl;
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites());
std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] Force average: " << force_abs << std::endl;
Mom -= force * ep* HMC_MOMENTUM_DENOMINATOR;;
double end_full = usecond();
double time_full = (end_full - start_full) / 1e3;
double time_force = (end_force - start_force) / 1e3;
std::cout << GridLogMessage << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)" << std::endl;
}
// Force from the other representations
as[level].apply(update_P_hireps, Representations, Mom, U, ep);
MomFilter->applyFilter(Mom);
}
void update_U(Field& U, double ep)
@ -184,12 +162,8 @@ protected:
void update_U(MomentaField& Mom, Field& U, double ep)
{
MomentaField MomFiltered(Mom.Grid());
MomFiltered = Mom;
MomFilter->applyFilter(MomFiltered);
// exponential of Mom*U in the gauge fields case
FieldImplementation::update_field(MomFiltered, U, ep);
FieldImplementation::update_field(Mom, U, ep);
// Update the smeared fields, can be implemented as observer
Smearer.set_Field(U);
@ -232,66 +206,6 @@ public:
const MomentaField & getMomentum() const{ return P; }
void reset_timer(void)
{
for (int level = 0; level < as.size(); ++level) {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
as[level].actions.at(actionID)->reset_timer();
}
}
}
void print_timer(void)
{
std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::" << std::endl;
std::cout << GridLogMessage << " Refresh cumulative timings "<<std::endl;
std::cout << GridLogMessage << "--------------------------- "<<std::endl;
for (int level = 0; level < as.size(); ++level) {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
std::cout << GridLogMessage
<< as[level].actions.at(actionID)->action_name()
<<"["<<level<<"]["<< actionID<<"] "
<< as[level].actions.at(actionID)->refresh_us*1.0e-6<<" s"<< std::endl;
}
}
std::cout << GridLogMessage << "--------------------------- "<<std::endl;
std::cout << GridLogMessage << " Action cumulative timings "<<std::endl;
std::cout << GridLogMessage << "--------------------------- "<<std::endl;
for (int level = 0; level < as.size(); ++level) {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
std::cout << GridLogMessage
<< as[level].actions.at(actionID)->action_name()
<<"["<<level<<"]["<< actionID<<"] "
<< as[level].actions.at(actionID)->S_us*1.0e-6<<" s"<< std::endl;
}
}
std::cout << GridLogMessage << "--------------------------- "<<std::endl;
std::cout << GridLogMessage << " Force cumulative timings "<<std::endl;
std::cout << GridLogMessage << "------------------------- "<<std::endl;
for (int level = 0; level < as.size(); ++level) {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
std::cout << GridLogMessage
<< as[level].actions.at(actionID)->action_name()
<<"["<<level<<"]["<< actionID<<"] "
<< as[level].actions.at(actionID)->deriv_us*1.0e-6<<" s"<< std::endl;
}
}
std::cout << GridLogMessage << "--------------------------- "<<std::endl;
std::cout << GridLogMessage << " Force average size "<<std::endl;
std::cout << GridLogMessage << "------------------------- "<<std::endl;
for (int level = 0; level < as.size(); ++level) {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
std::cout << GridLogMessage
<< as[level].actions.at(actionID)->action_name()
<<"["<<level<<"]["<< actionID<<"] : "
<<" force max " << as[level].actions.at(actionID)->deriv_max_average()
<<" norm " << as[level].actions.at(actionID)->deriv_norm_average()
<<" calls " << as[level].actions.at(actionID)->deriv_num
<< std::endl;
}
}
std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::"<< std::endl;
}
void print_parameters()
{
std::cout << GridLogMessage << "[Integrator] Name : "<< integrator_name() << std::endl;
@ -310,6 +224,7 @@ public:
}
}
std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::"<< std::endl;
}
void reverse_momenta()
@ -334,19 +249,15 @@ public:
void refresh(Field& U, GridSerialRNG & sRNG, GridParallelRNG& pRNG)
{
assert(P.Grid() == U.Grid());
std::cout << GridLogIntegrator << "Integrator refresh" << std::endl;
std::cout << GridLogIntegrator << "Integrator refresh\n";
std::cout << GridLogIntegrator << "Generating momentum" << std::endl;
FieldImplementation::generate_momenta(P, sRNG, pRNG);
// Update the smeared fields, can be implemented as observer
// necessary to keep the fields updated even after a reject
// of the Metropolis
std::cout << GridLogIntegrator << "Updating smeared fields" << std::endl;
Smearer.set_Field(U);
// Set the (eventual) representations gauge fields
std::cout << GridLogIntegrator << "Updating representations" << std::endl;
Representations.update(U);
// The Smearer is attached to a pointer of the gauge field
@ -356,19 +267,15 @@ public:
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
// get gauge field from the SmearingPolicy and
// based on the boolean is_smeared in actionID
auto name = as[level].actions.at(actionID)->action_name();
std::cout << GridLogMessage << "refresh [" << level << "][" << actionID << "] "<<name << std::endl;
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
as[level].actions.at(actionID)->refresh_timer_start();
as[level].actions.at(actionID)->refresh(Us, sRNG, pRNG);
as[level].actions.at(actionID)->refresh_timer_stop();
}
// Refresh the higher representation actions
as[level].apply(refresh_hireps, Representations, sRNG, pRNG);
}
MomFilter->applyFilter(P);
}
// to be used by the actionlevel class to iterate
@ -403,9 +310,7 @@ public:
// based on the boolean is_smeared in actionID
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
as[level].actions.at(actionID)->S_timer_start();
Hterm = as[level].actions.at(actionID)->S(Us);
as[level].actions.at(actionID)->S_timer_stop();
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
H += Hterm;
}

View File

@ -37,7 +37,7 @@ NAMESPACE_BEGIN(Grid);
template<typename MomentaField>
struct MomentumFilterBase{
virtual void applyFilter(MomentaField &P) const = 0;
virtual void applyFilter(MomentaField &P) const;
};
//Do nothing

View File

@ -99,7 +99,7 @@ public:
// using wilson flow by default here
WilsonFlow<PeriodicGimplR> WF(Pars.Smearing.steps, Pars.Smearing.step_size, Pars.Smearing.meas_interval);
WF.smear_adaptive(Usmear, U, Pars.Smearing.maxTau);
Real T0 = WF.energyDensityPlaquette(Pars.Smearing.maxTau, Usmear);
Real T0 = WF.energyDensityPlaquette(Usmear);
std::cout << GridLogMessage << std::setprecision(std::numeric_limits<Real>::digits10 + 1)
<< "T0 : [ " << traj << " ] "<< T0 << std::endl;
}

View File

@ -7,7 +7,6 @@ Source file: ./lib/qcd/modules/plaquette.h
Copyright (C) 2017
Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Christopher Kelly <ckelly@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -34,44 +33,28 @@ NAMESPACE_BEGIN(Grid);
template <class Gimpl>
class WilsonFlow: public Smear<Gimpl>{
public:
//Store generic measurements to take during smearing process using std::function
typedef std::function<void(int, RealD, const typename Gimpl::GaugeField &)> FunctionType; //int: step, RealD: flow time, GaugeField : the gauge field
private:
unsigned int Nstep;
RealD epsilon; //for regular smearing this is the time step, for adaptive it is the initial time step
std::vector< std::pair<int, FunctionType> > functions; //The int maps to the measurement frequency
unsigned int measure_interval;
mutable RealD epsilon, taus;
mutable WilsonGaugeAction<Gimpl> SG;
//Evolve the gauge field by 1 step and update tau
void evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const;
//Evolve the gauge field by 1 step and update tau and the current time step eps
void evolve_step_adaptive(typename Gimpl::GaugeField&U, RealD &tau, RealD &eps, RealD maxTau) const;
void evolve_step(typename Gimpl::GaugeField&) const;
void evolve_step_adaptive(typename Gimpl::GaugeField&, RealD);
RealD tau(unsigned int t)const {return epsilon*(t+1.0); }
public:
INHERIT_GIMPL_TYPES(Gimpl)
void resetActions(){ functions.clear(); }
void addMeasurement(int meas_interval, FunctionType meas){ functions.push_back({meas_interval, meas}); }
//Set the class to perform the default measurements:
//the plaquette energy density every step
//the plaquette topological charge every 'topq_meas_interval' steps
//and output to stdout
void setDefaultMeasurements(int topq_meas_interval = 1);
explicit WilsonFlow(unsigned int Nstep, RealD epsilon, unsigned int interval = 1):
Nstep(Nstep),
epsilon(epsilon),
measure_interval(interval),
SG(WilsonGaugeAction<Gimpl>(3.0)) {
// WilsonGaugeAction with beta 3.0
assert(epsilon > 0.0);
LogMessage();
setDefaultMeasurements(interval);
}
void LogMessage() {
@ -90,29 +73,9 @@ public:
// undefined for WilsonFlow
}
void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau) const;
//Compute t^2 <E(t)> for time t from the plaquette
static RealD energyDensityPlaquette(const RealD t, const GaugeField& U);
//Compute t^2 <E(t)> for time t from the 1x1 cloverleaf form
//t is the Wilson flow time
static RealD energyDensityCloverleaf(const RealD t, const GaugeField& U);
//Evolve the gauge field by Nstep steps of epsilon and return the energy density computed every interval steps
//The smeared field is output as V
std::vector<RealD> flowMeasureEnergyDensityPlaquette(GaugeField &V, const GaugeField& U, int measure_interval = 1);
//Version that does not return the smeared field
std::vector<RealD> flowMeasureEnergyDensityPlaquette(const GaugeField& U, int measure_interval = 1);
//Evolve the gauge field by Nstep steps of epsilon and return the Cloverleaf energy density computed every interval steps
//The smeared field is output as V
std::vector<RealD> flowMeasureEnergyDensityCloverleaf(GaugeField &V, const GaugeField& U, int measure_interval = 1);
//Version that does not return the smeared field
std::vector<RealD> flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval = 1);
void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau);
RealD energyDensityPlaquette(unsigned int step, const GaugeField& U) const;
RealD energyDensityPlaquette(const GaugeField& U) const;
};
@ -120,7 +83,7 @@ public:
// Implementations
////////////////////////////////////////////////////////////////////////////////
template <class Gimpl>
void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const{
void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U) const{
GaugeField Z(U.Grid());
GaugeField tmp(U.Grid());
SG.deriv(U, Z);
@ -136,13 +99,12 @@ void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U, RealD &tau) c
SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2
tau += epsilon;
}
template <class Gimpl>
void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD &tau, RealD &eps, RealD maxTau) const{
if (maxTau - tau < eps){
eps = maxTau-tau;
void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD maxTau) {
if (maxTau - taus < epsilon){
epsilon = maxTau-taus;
}
//std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl;
GaugeField Z(U.Grid());
@ -152,151 +114,95 @@ void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, Real
SG.deriv(U, Z);
Zprime = -Z;
Z *= 0.25; // Z0 = 1/4 * F(U)
Gimpl::update_field(Z, U, -2.0*eps); // U = W1 = exp(ep*Z0)*W0
Gimpl::update_field(Z, U, -2.0*epsilon); // U = W1 = exp(ep*Z0)*W0
Z *= -17.0/8.0;
SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1
Zprime += 2.0*tmp;
Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1
Gimpl::update_field(Z, U, -2.0*eps); // U_= W2 = exp(ep*Z)*W1
Gimpl::update_field(Z, U, -2.0*epsilon); // U_= W2 = exp(ep*Z)*W1
Z *= -4.0/3.0;
SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
Gimpl::update_field(Z, U, -2.0*eps); // V(t+e) = exp(ep*Z)*W2
Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2
// Ramos
Gimpl::update_field(Zprime, Uprime, -2.0*eps); // V'(t+e) = exp(ep*Z')*W0
Gimpl::update_field(Zprime, Uprime, -2.0*epsilon); // V'(t+e) = exp(ep*Z')*W0
// Compute distance as norm^2 of the difference
GaugeField diffU = U - Uprime;
RealD diff = norm2(diffU);
// adjust integration step
tau += eps;
taus += epsilon;
//std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl;
eps = eps*0.95*std::pow(1e-4/diff,1./3.);
epsilon = epsilon*0.95*std::pow(1e-4/diff,1./3.);
//std::cout << GridLogMessage << "New epsilon : " << epsilon << std::endl;
}
template <class Gimpl>
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(const RealD t, const GaugeField& U){
static WilsonGaugeAction<Gimpl> SG(3.0);
return 2.0 * t * t * SG.S(U)/U.Grid()->gSites();
}
//Compute t^2 <E(t)> for time from the 1x1 cloverleaf form
template <class Gimpl>
RealD WilsonFlow<Gimpl>::energyDensityCloverleaf(const RealD t, const GaugeField& U){
typedef typename Gimpl::GaugeLinkField GaugeMat;
typedef typename Gimpl::GaugeField GaugeLorentz;
assert(Nd == 4);
//E = 1/2 tr( F_munu F_munu )
//However as F_numu = -F_munu, only need to sum the trace of the squares of the following 6 field strengths:
//F_01 F_02 F_03 F_12 F_13 F_23
GaugeMat F(U.Grid());
LatticeComplexD R(U.Grid());
R = Zero();
for(int mu=0;mu<3;mu++){
for(int nu=mu+1;nu<4;nu++){
WilsonLoops<Gimpl>::FieldStrength(F, U, mu, nu);
R = R + trace(F*F);
}
}
ComplexD out = sum(R);
out = t*t*out / RealD(U.Grid()->gSites());
return -real(out); //minus sign necessary for +ve energy
}
template <class Gimpl>
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityPlaquette(GaugeField &V, const GaugeField& U, int measure_interval){
std::vector<RealD> out;
resetActions();
addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Computing plaquette energy density for step " << step << std::endl;
out.push_back( energyDensityPlaquette(t,U) );
});
smear(V,U);
return out;
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(unsigned int step, const GaugeField& U) const {
RealD td = tau(step);
return 2.0 * td * td * SG.S(U)/U.Grid()->gSites();
}
template <class Gimpl>
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityPlaquette(const GaugeField& U, int measure_interval){
GaugeField V(U);
return flowMeasureEnergyDensityPlaquette(V,U, measure_interval);
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(const GaugeField& U) const {
return 2.0 * taus * taus * SG.S(U)/U.Grid()->gSites();
}
template <class Gimpl>
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityCloverleaf(GaugeField &V, const GaugeField& U, int measure_interval){
std::vector<RealD> out;
resetActions();
addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Computing Cloverleaf energy density for step " << step << std::endl;
out.push_back( energyDensityCloverleaf(t,U) );
});
smear(V,U);
return out;
}
template <class Gimpl>
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval){
GaugeField V(U);
return flowMeasureEnergyDensityCloverleaf(V,U, measure_interval);
}
//#define WF_TIMING
template <class Gimpl>
void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const{
void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const {
out = in;
RealD taus = 0.;
for (unsigned int step = 1; step <= Nstep; step++) { //step indicates the number of smearing steps applied at the time of measurement
for (unsigned int step = 1; step <= Nstep; step++) {
auto start = std::chrono::high_resolution_clock::now();
evolve_step(out, taus);
evolve_step(out);
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> diff = end - start;
#ifdef WF_TIMING
std::cout << "Time to evolve " << diff.count() << " s\n";
#endif
//Perform measurements
for(auto const &meas : functions)
if( step % meas.first == 0 ) meas.second(step,taus,out);
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
<< step << " " << tau(step) << " "
<< energyDensityPlaquette(step,out) << std::endl;
if( step % measure_interval == 0){
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : "
<< step << " "
<< WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
}
}
}
template <class Gimpl>
void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau) const{
void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau){
out = in;
RealD taus = 0.;
RealD eps = epsilon;
taus = epsilon;
unsigned int step = 0;
do{
step++;
//std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl;
evolve_step_adaptive(out, taus, eps, maxTau);
//Perform measurements
for(auto const &meas : functions)
if( step % meas.first == 0 ) meas.second(step,taus,out);
evolve_step_adaptive(out, maxTau);
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
<< step << " " << taus << " "
<< energyDensityPlaquette(out) << std::endl;
if( step % measure_interval == 0){
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : "
<< step << " "
<< WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
}
} while (taus < maxTau);
}
template <class Gimpl>
void WilsonFlow<Gimpl>::setDefaultMeasurements(int topq_meas_interval){
addMeasurement(1, [](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " << step << " " << t << " " << energyDensityPlaquette(t,U) << std::endl;
});
addMeasurement(topq_meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " << WilsonLoops<Gimpl>::TopologicalCharge(U) << std::endl;
});
}
}
NAMESPACE_END(Grid);

View File

@ -88,12 +88,6 @@ namespace PeriodicBC {
return CovShiftBackward(Link,mu,arg);
}
//Boundary-aware C-shift of gauge links / gauge transformation matrices
template<class gauge> Lattice<gauge>
CshiftLink(const Lattice<gauge> &Link, int mu, int shift)
{
return Cshift(Link, mu, shift);
}
}
@ -164,9 +158,6 @@ namespace ConjugateBC {
// std::cout<<"Gparity::CovCshiftBackward mu="<<mu<<std::endl;
return Cshift(tmp,mu,-1);// moves towards positive mu
}
//Out(x) = U^dag_\mu(x-mu) | x_\mu != 0
// = U^T_\mu(L-1) | x_\mu == 0
template<class gauge> Lattice<gauge>
CovShiftIdentityBackward(const Lattice<gauge> &Link, int mu) {
GridBase *grid = Link.Grid();
@ -185,9 +176,6 @@ namespace ConjugateBC {
return Link;
}
//Out(x) = S_\mu(x+\hat\mu) | x_\mu != L-1
// = S*_\mu(0) | x_\mu == L-1
//Note: While this is used for Staples it is also applicable for shifting gauge links or gauge transformation matrices
template<class gauge> Lattice<gauge>
ShiftStaple(const Lattice<gauge> &Link, int mu)
{
@ -220,35 +208,6 @@ namespace ConjugateBC {
return CovShiftBackward(Link,mu,arg);
}
//Boundary-aware C-shift of gauge links / gauge transformation matrices
//shift = 1
//Out(x) = U_\mu(x+\hat\mu) | x_\mu != L-1
// = U*_\mu(0) | x_\mu == L-1
//shift = -1
//Out(x) = U_\mu(x-mu) | x_\mu != 0
// = U*_\mu(L-1) | x_\mu == 0
template<class gauge> Lattice<gauge>
CshiftLink(const Lattice<gauge> &Link, int mu, int shift)
{
GridBase *grid = Link.Grid();
int Lmu = grid->GlobalDimensions()[mu] - 1;
Lattice<iScalar<vInteger>> coor(grid);
LatticeCoordinate(coor, mu);
Lattice<gauge> tmp(grid);
if(shift == 1){
tmp = Cshift(Link, mu, 1);
tmp = where(coor == Lmu, conjugate(tmp), tmp);
return tmp;
}else if(shift == -1){
tmp = Link;
tmp = where(coor == Lmu, conjugate(tmp), tmp);
return Cshift(tmp, mu, -1);
}else assert(0 && "Invalid shift value");
return tmp; //shuts up the compiler fussing about the return type
}
}

View File

@ -40,45 +40,27 @@ public:
typedef typename Gimpl::GaugeLinkField GaugeMat;
typedef typename Gimpl::GaugeField GaugeLorentz;
//A_\mu(x) = -i Ta(U_\mu(x) ) where Ta(U) = 1/2( U - U^dag ) - 1/2N tr(U - U^dag) is the traceless antihermitian part. This is an O(A^3) approximation to the logarithm of U
static void GaugeLinkToLieAlgebraField(const GaugeMat &U, GaugeMat &A) {
Complex cmi(0.0,-1.0);
A = Ta(U) * cmi;
static void GaugeLinkToLieAlgebraField(const std::vector<GaugeMat> &U,std::vector<GaugeMat> &A) {
for(int mu=0;mu<Nd;mu++){
Complex cmi(0.0,-1.0);
A[mu] = Ta(U[mu]) * cmi;
}
}
//The derivative of the Lie algebra field
static void DmuAmu(const std::vector<GaugeMat> &U, GaugeMat &dmuAmu,int orthog) {
GridBase* grid = U[0].Grid();
GaugeMat Ax(grid);
GaugeMat Axm1(grid);
GaugeMat Utmp(grid);
static void DmuAmu(const std::vector<GaugeMat> &A,GaugeMat &dmuAmu,int orthog) {
dmuAmu=Zero();
for(int mu=0;mu<Nd;mu++){
if ( mu != orthog ) {
//Rather than define functionality to work out how the BCs apply to A_\mu we simply use the BC-aware Cshift to the gauge links and compute A_\mu(x) and A_\mu(x-1) separately
//Ax = A_\mu(x)
GaugeLinkToLieAlgebraField(U[mu], Ax);
//Axm1 = A_\mu(x_\mu-1)
Utmp = Gimpl::CshiftLink(U[mu], mu, -1);
GaugeLinkToLieAlgebraField(Utmp, Axm1);
//Derivative
dmuAmu = dmuAmu + Ax - Axm1;
dmuAmu = dmuAmu + A[mu] - Cshift(A[mu],mu,-1);
}
}
}
//Fix the gauge field Umu
//0 < alpha < 1 is related to the step size, cf https://arxiv.org/pdf/1405.5812.pdf
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) {
GridBase *grid = Umu.Grid();
GaugeMat xform(grid);
SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier,orthog,err_on_no_converge);
}
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) {
//Fix the gauge field Umu and also return the gauge transformation from the original gauge field, xform
GridBase *grid = Umu.Grid();
@ -141,25 +123,28 @@ public:
}
}
std::cout << GridLogError << "Gauge fixing did not converge in " << maxiter << " iterations." << std::endl;
if (err_on_no_converge)
assert(0 && "Gauge fixing did not converge within the specified number of iterations");
if (err_on_no_converge) assert(0);
};
static Real SteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform, Real alpha, GaugeMat & dmuAmu,int orthog) {
static Real SteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu,int orthog) {
GridBase *grid = U[0].Grid();
std::vector<GaugeMat> A(Nd,grid);
GaugeMat g(grid);
ExpiAlphaDmuAmu(U,g,alpha,dmuAmu,orthog);
GaugeLinkToLieAlgebraField(U,A);
ExpiAlphaDmuAmu(A,g,alpha,dmuAmu,orthog);
Real vol = grid->gSites();
Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
xform = g*xform ;
SU<Nc>::GaugeTransform<Gimpl>(U,g);
SU<Nc>::GaugeTransform(U,g);
return trG;
}
static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform, Real alpha, GaugeMat & dmuAmu,int orthog) {
static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu,int orthog) {
GridBase *grid = U[0].Grid();
@ -174,7 +159,11 @@ public:
GaugeMat g(grid);
GaugeMat dmuAmu_p(grid);
DmuAmu(U,dmuAmu,orthog);
std::vector<GaugeMat> A(Nd,grid);
GaugeLinkToLieAlgebraField(U,A);
DmuAmu(A,dmuAmu,orthog);
std::vector<int> mask(Nd,1);
for(int mu=0;mu<Nd;mu++) if (mu==orthog) mask[mu]=0;
@ -218,16 +207,16 @@ public:
Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
xform = g*xform ;
SU<Nc>::GaugeTransform<Gimpl>(U,g);
SU<Nc>::GaugeTransform(U,g);
return trG;
}
static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &U,GaugeMat &g, Real alpha, GaugeMat &dmuAmu,int orthog) {
static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu,int orthog) {
GridBase *grid = g.Grid();
Complex cialpha(0.0,-alpha);
GaugeMat ciadmam(grid);
DmuAmu(U,dmuAmu,orthog);
DmuAmu(A,dmuAmu,orthog);
ciadmam = dmuAmu*cialpha;
SU<Nc>::taExp(ciadmam,g);
}

View File

@ -694,32 +694,32 @@ public:
* Adjoint rep gauge xform
*/
template<typename Gimpl>
static void GaugeTransform(typename Gimpl::GaugeField &Umu, typename Gimpl::GaugeLinkField &g){
template<typename GaugeField,typename GaugeMat>
static void GaugeTransform( GaugeField &Umu, GaugeMat &g){
GridBase *grid = Umu.Grid();
conformable(grid,g.Grid());
typename Gimpl::GaugeLinkField U(grid);
typename Gimpl::GaugeLinkField ag(grid); ag = adj(g);
GaugeMat U(grid);
GaugeMat ag(grid); ag = adj(g);
for(int mu=0;mu<Nd;mu++){
U= PeekIndex<LorentzIndex>(Umu,mu);
U = g*U*Gimpl::CshiftLink(ag, mu, 1); //BC-aware
U = g*U*Cshift(ag, mu, 1);
PokeIndex<LorentzIndex>(Umu,U,mu);
}
}
template<typename Gimpl>
static void GaugeTransform( std::vector<typename Gimpl::GaugeLinkField> &U, typename Gimpl::GaugeLinkField &g){
template<typename GaugeMat>
static void GaugeTransform( std::vector<GaugeMat> &U, GaugeMat &g){
GridBase *grid = g.Grid();
typename Gimpl::GaugeLinkField ag(grid); ag = adj(g);
GaugeMat ag(grid); ag = adj(g);
for(int mu=0;mu<Nd;mu++){
U[mu] = g*U[mu]*Gimpl::CshiftLink(ag, mu, 1); //BC-aware
U[mu] = g*U[mu]*Cshift(ag, mu, 1);
}
}
template<typename Gimpl>
static void RandomGaugeTransform(GridParallelRNG &pRNG, typename Gimpl::GaugeField &Umu, typename Gimpl::GaugeLinkField &g){
template<typename GaugeField,typename GaugeMat>
static void RandomGaugeTransform(GridParallelRNG &pRNG, GaugeField &Umu, GaugeMat &g){
LieRandomize(pRNG,g,1.0);
GaugeTransform<Gimpl>(Umu,g);
GaugeTransform(Umu,g);
}
// Projects the algebra components a lattice matrix (of dimension ncol*ncol -1 )

View File

@ -125,57 +125,6 @@ public:
return sumplaq / vol / faces / Nc; // Nd , Nc dependent... FIXME
}
//////////////////////////////////////////////////
// sum over all spatial planes of plaquette
//////////////////////////////////////////////////
static void siteSpatialPlaquette(ComplexField &Plaq,
const std::vector<GaugeMat> &U) {
ComplexField sitePlaq(U[0].Grid());
Plaq = Zero();
for (int mu = 1; mu < Nd-1; mu++) {
for (int nu = 0; nu < mu; nu++) {
traceDirPlaquette(sitePlaq, U, mu, nu);
Plaq = Plaq + sitePlaq;
}
}
}
////////////////////////////////////
// sum over all x,y,z and over all spatial planes of plaquette
//////////////////////////////////////////////////
static std::vector<RealD> timesliceSumSpatialPlaquette(const GaugeLorentz &Umu) {
std::vector<GaugeMat> U(Nd, Umu.Grid());
// inefficient here
for (int mu = 0; mu < Nd; mu++) {
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
}
ComplexField Plaq(Umu.Grid());
siteSpatialPlaquette(Plaq, U);
typedef typename ComplexField::scalar_object sobj;
std::vector<sobj> Tq;
sliceSum(Plaq, Tq, Nd-1);
std::vector<Real> out(Tq.size());
for(int t=0;t<Tq.size();t++) out[t] = TensorRemove(Tq[t]).real();
return out;
}
//////////////////////////////////////////////////
// average over all x,y,z and over all spatial planes of plaquette
//////////////////////////////////////////////////
static std::vector<RealD> timesliceAvgSpatialPlaquette(const GaugeLorentz &Umu) {
std::vector<RealD> sumplaq = timesliceSumSpatialPlaquette(Umu);
int Lt = Umu.Grid()->FullDimensions()[Nd-1];
assert(sumplaq.size() == Lt);
double vol = Umu.Grid()->gSites() / Lt;
double faces = (1.0 * (Nd - 1)* (Nd - 2)) / 2.0;
for(int t=0;t<Lt;t++)
sumplaq[t] = sumplaq[t] / vol / faces / Nc; // Nd , Nc dependent... FIXME
return sumplaq;
}
//////////////////////////////////////////////////
// average over all x,y,z the temporal loop
//////////////////////////////////////////////////
@ -413,11 +362,11 @@ public:
GaugeMat u = PeekIndex<LorentzIndex>(Umu, mu); // some redundant copies
GaugeMat vu = v*u;
//FS = 0.25*Ta(u*v + Cshift(vu, mu, -1));
FS = (u*v + Gimpl::CshiftLink(vu, mu, -1));
FS = (u*v + Cshift(vu, mu, -1));
FS = 0.125*(FS - adj(FS));
}
static Real TopologicalCharge(const GaugeLorentz &U){
static Real TopologicalCharge(GaugeLorentz &U){
// 4d topological charge
assert(Nd==4);
// Bx = -iF(y,z), By = -iF(z,y), Bz = -iF(x,y)
@ -440,203 +389,6 @@ public:
}
//Clover-leaf Wilson loop combination for arbitrary mu-extent M and nu extent N, mu >= nu
//cf https://arxiv.org/pdf/hep-lat/9701012.pdf Eq 7 for 1x2 Wilson loop
//Clockwise ordering
static void CloverleafMxN(GaugeMat &FS, const GaugeMat &Umu, const GaugeMat &Unu, int mu, int nu, int M, int N){
#define Fmu(A) Gimpl::CovShiftForward(Umu, mu, A)
#define Bmu(A) Gimpl::CovShiftBackward(Umu, mu, A)
#define Fnu(A) Gimpl::CovShiftForward(Unu, nu, A)
#define Bnu(A) Gimpl::CovShiftBackward(Unu, nu, A)
#define FmuI Gimpl::CovShiftIdentityForward(Umu, mu)
#define BmuI Gimpl::CovShiftIdentityBackward(Umu, mu)
#define FnuI Gimpl::CovShiftIdentityForward(Unu, nu)
#define BnuI Gimpl::CovShiftIdentityBackward(Unu, nu)
//Upper right loop
GaugeMat tmp = BmuI;
for(int i=1;i<M;i++)
tmp = Bmu(tmp);
for(int j=0;j<N;j++)
tmp = Bnu(tmp);
for(int i=0;i<M;i++)
tmp = Fmu(tmp);
for(int j=0;j<N;j++)
tmp = Fnu(tmp);
FS = tmp;
//Upper left loop
tmp = BnuI;
for(int j=1;j<N;j++)
tmp = Bnu(tmp);
for(int i=0;i<M;i++)
tmp = Fmu(tmp);
for(int j=0;j<N;j++)
tmp = Fnu(tmp);
for(int i=0;i<M;i++)
tmp = Bmu(tmp);
FS = FS + tmp;
//Lower right loop
tmp = FnuI;
for(int j=1;j<N;j++)
tmp = Fnu(tmp);
for(int i=0;i<M;i++)
tmp = Bmu(tmp);
for(int j=0;j<N;j++)
tmp = Bnu(tmp);
for(int i=0;i<M;i++)
tmp = Fmu(tmp);
FS = FS + tmp;
//Lower left loop
tmp = FmuI;
for(int i=1;i<M;i++)
tmp = Fmu(tmp);
for(int j=0;j<N;j++)
tmp = Fnu(tmp);
for(int i=0;i<M;i++)
tmp = Bmu(tmp);
for(int j=0;j<N;j++)
tmp = Bnu(tmp);
FS = FS + tmp;
#undef Fmu
#undef Bmu
#undef Fnu
#undef Bnu
#undef FmuI
#undef BmuI
#undef FnuI
#undef BnuI
}
//Field strength from MxN Wilson loop
//Note F_numu = - F_munu
static void FieldStrengthMxN(GaugeMat &FS, const GaugeLorentz &U, int mu, int nu, int M, int N){
GaugeMat Umu = PeekIndex<LorentzIndex>(U, mu);
GaugeMat Unu = PeekIndex<LorentzIndex>(U, nu);
if(M == N){
GaugeMat F(Umu.Grid());
CloverleafMxN(F, Umu, Unu, mu, nu, M, N);
FS = 0.125 * ( F - adj(F) );
}else{
//Average over both orientations
GaugeMat horizontal(Umu.Grid()), vertical(Umu.Grid());
CloverleafMxN(horizontal, Umu, Unu, mu, nu, M, N);
CloverleafMxN(vertical, Umu, Unu, mu, nu, N, M);
FS = 0.0625 * ( horizontal - adj(horizontal) + vertical - adj(vertical) );
}
}
//Topological charge contribution from MxN Wilson loops
//cf https://arxiv.org/pdf/hep-lat/9701012.pdf Eq 6
//output is the charge by timeslice: sum over timeslices to obtain the total
static std::vector<Real> TimesliceTopologicalChargeMxN(const GaugeLorentz &U, int M, int N){
assert(Nd == 4);
std::vector<std::vector<GaugeMat*> > F(Nd,std::vector<GaugeMat*>(Nd,nullptr));
//Note F_numu = - F_munu
//hence we only need to loop over mu,nu,rho,sigma that aren't related by permuting mu,nu or rho,sigma
//Use nu > mu
for(int mu=0;mu<Nd-1;mu++){
for(int nu=mu+1; nu<Nd; nu++){
F[mu][nu] = new GaugeMat(U.Grid());
FieldStrengthMxN(*F[mu][nu], U, mu, nu, M, N);
}
}
Real coeff = -1./(32 * M_PI*M_PI * M*M * N*N); //overall sign to match CPS and Grid conventions, possibly related to time direction = 3 vs 0
static const int combs[3][4] = { {0,1,2,3}, {0,2,1,3}, {0,3,1,2} };
static const int signs[3] = { 1, -1, 1 }; //epsilon_{mu nu rho sigma}
ComplexField fsum(U.Grid());
fsum = Zero();
for(int c=0;c<3;c++){
int mu = combs[c][0], nu = combs[c][1], rho = combs[c][2], sigma = combs[c][3];
int eps = signs[c];
fsum = fsum + (8. * coeff * eps) * trace( (*F[mu][nu]) * (*F[rho][sigma]) );
}
for(int mu=0;mu<Nd-1;mu++)
for(int nu=mu+1; nu<Nd; nu++)
delete F[mu][nu];
typedef typename ComplexField::scalar_object sobj;
std::vector<sobj> Tq;
sliceSum(fsum, Tq, Nd-1);
std::vector<Real> out(Tq.size());
for(int t=0;t<Tq.size();t++) out[t] = TensorRemove(Tq[t]).real();
return out;
}
static Real TopologicalChargeMxN(const GaugeLorentz &U, int M, int N){
std::vector<Real> Tq = TimesliceTopologicalChargeMxN(U,M,N);
Real out(0);
for(int t=0;t<Tq.size();t++) out += Tq[t];
return out;
}
//Generate the contributions to the 5Li topological charge from Wilson loops of the following sizes
//Use coefficients from hep-lat/9701012
//1x1 : c1=(19.-55.*c5)/9.
//2x2 : c2=(1-64.*c5)/9.
//1x2 : c3=(-64.+640.*c5)/45.
//1x3 : c4=1./5.-2.*c5
//3x3 : c5=1./20.
//Output array outer index contains the loops in the above order
//Inner index is the time coordinate
static std::vector<std::vector<Real> > TimesliceTopologicalCharge5LiContributions(const GaugeLorentz &U){
static const int exts[5][2] = { {1,1}, {2,2}, {1,2}, {1,3}, {3,3} };
std::vector<std::vector<Real> > out(5);
for(int i=0;i<5;i++){
out[i] = TimesliceTopologicalChargeMxN(U,exts[i][0],exts[i][1]);
}
return out;
}
static std::vector<Real> TopologicalCharge5LiContributions(const GaugeLorentz &U){
static const int exts[5][2] = { {1,1}, {2,2}, {1,2}, {1,3}, {3,3} };
std::vector<Real> out(5);
std::cout << GridLogMessage << "Computing topological charge" << std::endl;
for(int i=0;i<5;i++){
out[i] = TopologicalChargeMxN(U,exts[i][0],exts[i][1]);
std::cout << GridLogMessage << exts[i][0] << "x" << exts[i][1] << " Wilson loop contribution " << out[i] << std::endl;
}
return out;
}
//Compute the 5Li topological charge
static std::vector<Real> TimesliceTopologicalCharge5Li(const GaugeLorentz &U){
std::vector<std::vector<Real> > loops = TimesliceTopologicalCharge5LiContributions(U);
double c5=1./20.;
double c4=1./5.-2.*c5;
double c3=(-64.+640.*c5)/45.;
double c2=(1-64.*c5)/9.;
double c1=(19.-55.*c5)/9.;
int Lt = loops[0].size();
std::vector<Real> out(Lt,0.);
for(int t=0;t<Lt;t++)
out[t] += c1*loops[0][t] + c2*loops[1][t] + c3*loops[2][t] + c4*loops[3][t] + c5*loops[4][t];
return out;
}
static Real TopologicalCharge5Li(const GaugeLorentz &U){
std::vector<Real> Qt = TimesliceTopologicalCharge5Li(U);
Real Q = 0.;
for(int t=0;t<Qt.size();t++) Q += Qt[t];
std::cout << GridLogMessage << "5Li Topological charge: " << Q << std::endl;
return Q;
}
//////////////////////////////////////////////////////
// Similar to above for rectangle is required
//////////////////////////////////////////////////////

View File

@ -26,7 +26,7 @@
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP))
#ifndef GRID_HIP
NAMESPACE_BEGIN(Grid);
@ -82,7 +82,7 @@ void JSONWriter::writeDefault(const std::string &s, const std::string &x)
if (s.size())
ss_ << "\""<< s << "\" : \"" << os.str() << "\" ," ;
else
ss_ << os.str() << " ," ;
ss_ << "\""<< os.str() << "\" ," ;
}
// Reader implementation ///////////////////////////////////////////////////////

View File

@ -54,7 +54,7 @@ namespace Grid
void pop(void);
template <typename U>
void writeDefault(const std::string &s, const U &x);
#ifdef __NVCC__
#if defined(GRID_CUDA) || defined(GRID_HIP)
void writeDefault(const std::string &s, const Grid::ComplexD &x)
{
std::complex<double> z(real(x),imag(x));
@ -101,7 +101,7 @@ namespace Grid
void readDefault(const std::string &s, std::vector<U> &output);
template <typename U, typename P>
void readDefault(const std::string &s, std::pair<U,P> &output);
#ifdef __NVCC__
#if defined(GRID_CUDA) || defined(GRID_HIP)
void readDefault(const std::string &s, ComplexD &output)
{
std::complex<double> z;

View File

@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include "BinaryIO.h"
#include "TextIO.h"
#include "XmlIO.h"
#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP))
#ifndef GRID_HIP
#include "JSON_IO.h"
#endif

View File

@ -52,11 +52,6 @@ public:
return arg;
}
};
class SimpleStencilParams{
public:
Coordinate dirichlet;
SimpleStencilParams() {};
};
NAMESPACE_END(Grid);

View File

@ -80,11 +80,14 @@ void Gather_plane_simple_table (commVector<std::pair<int,int> >& table,const Lat
///////////////////////////////////////////////////////////////////
template<class cobj,class vobj,class compressor>
void Gather_plane_exchange_table(const Lattice<vobj> &rhs,
commVector<cobj *> pointers,int dimension,int plane,int cbmask,compressor &compress,int type) __attribute__((noinline));
commVector<cobj *> pointers,
int dimension,int plane,
int cbmask,compressor &compress,int type) __attribute__((noinline));
template<class cobj,class vobj,class compressor>
void Gather_plane_exchange_table(commVector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,
Vector<cobj *> pointers,int dimension,int plane,int cbmask,
void Gather_plane_exchange_table(commVector<std::pair<int,int> >& table,
const Lattice<vobj> &rhs,
std::vector<cobj *> &pointers,int dimension,int plane,int cbmask,
compressor &compress,int type)
{
assert( (table.size()&0x1)==0);
@ -92,14 +95,15 @@ void Gather_plane_exchange_table(commVector<std::pair<int,int> >& table,const La
int so = plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
auto rhs_v = rhs.View(AcceleratorRead);
auto rhs_p = &rhs_v[0];
auto p0=&pointers[0][0];
auto p1=&pointers[1][0];
auto tp=&table[0];
accelerator_forNB(j, num, vobj::Nsimd(), {
compress.CompressExchange(p0,p1, &rhs_v[0], j,
so+tp[2*j ].second,
so+tp[2*j+1].second,
type);
compress.CompressExchange(p0,p1, rhs_p, j,
so+tp[2*j ].second,
so+tp[2*j+1].second,
type);
});
rhs_v.ViewClose();
}
@ -133,8 +137,6 @@ class CartesianStencilAccelerator {
int _osites;
StencilVector _directions;
StencilVector _distances;
StencilVector _comms_send;
StencilVector _comms_recv;
StencilVector _comm_buf_size;
StencilVector _permute_type;
StencilVector same_node;
@ -228,14 +230,12 @@ public:
void * recv_buf;
Integer to_rank;
Integer from_rank;
Integer do_send;
Integer do_recv;
Integer bytes;
};
struct Merge {
cobj * mpointer;
Vector<scalar_object *> rpointers;
Vector<cobj *> vpointers;
// std::vector<scalar_object *> rpointers;
std::vector<cobj *> vpointers;
Integer buffer_size;
Integer type;
};
@ -244,20 +244,7 @@ public:
cobj * mpi_p;
Integer buffer_size;
};
struct CopyReceiveBuffer {
void * from_p;
void * to_p;
Integer bytes;
};
struct CachedTransfer {
Integer direction;
Integer OrthogPlane;
Integer DestProc;
Integer bytes;
Integer lane;
Integer cb;
void *recv_buf;
};
protected:
GridBase * _grid;
@ -288,8 +275,7 @@ public:
std::vector<Merge> MergersSHM;
std::vector<Decompress> Decompressions;
std::vector<Decompress> DecompressionsSHM;
std::vector<CopyReceiveBuffer> CopyReceiveBuffers ;
std::vector<CachedTransfer> CachedTransfers;
///////////////////////////////////////////////////////////
// Unified Comms buffers for all directions
///////////////////////////////////////////////////////////
@ -302,6 +288,29 @@ public:
int u_comm_offset;
int _unified_buffer_size;
/////////////////////////////////////////
// Timing info; ugly; possibly temporary
/////////////////////////////////////////
double commtime;
double mpi3synctime;
double mpi3synctime_g;
double shmmergetime;
double gathertime;
double gathermtime;
double halogtime;
double mergetime;
double decompresstime;
double comms_bytes;
double shm_bytes;
double splicetime;
double nosplicetime;
double calls;
std::vector<double> comm_bytes_thr;
std::vector<double> shm_bytes_thr;
std::vector<double> comm_time_thr;
std::vector<double> comm_enter_thr;
std::vector<double> comm_leave_thr;
////////////////////////////////////////
// Stencil query
////////////////////////////////////////
@ -328,12 +337,11 @@ public:
//////////////////////////////////////////
// Comms packet queue for asynch thread
// Use OpenMP Tasks for cleaner ???
// must be called *inside* parallel region
//////////////////////////////////////////
/*
void CommunicateThreaded()
{
#ifdef GRID_OMP
// must be called in parallel region
int mythread = omp_get_thread_num();
int nthreads = CartesianCommunicator::nCommThreads;
#else
@ -342,30 +350,67 @@ public:
#endif
if (nthreads == -1) nthreads = 1;
if (mythread < nthreads) {
comm_enter_thr[mythread] = usecond();
for (int i = mythread; i < Packets.size(); i += nthreads) {
uint64_t bytes = _grid->StencilSendToRecvFrom(Packets[i].send_buf,
Packets[i].to_rank,
Packets[i].recv_buf,
Packets[i].from_rank,
Packets[i].bytes,i);
comm_bytes_thr[mythread] += bytes;
shm_bytes_thr[mythread] += 2*Packets[i].bytes-bytes; // Send + Recv.
}
comm_leave_thr[mythread]= usecond();
comm_time_thr[mythread] += comm_leave_thr[mythread] - comm_enter_thr[mythread];
}
}
*/
void CollateThreads(void)
{
int nthreads = CartesianCommunicator::nCommThreads;
double first=0.0;
double last =0.0;
for(int t=0;t<nthreads;t++) {
double t0 = comm_enter_thr[t];
double t1 = comm_leave_thr[t];
comms_bytes+=comm_bytes_thr[t];
shm_bytes +=shm_bytes_thr[t];
comm_enter_thr[t] = 0.0;
comm_leave_thr[t] = 0.0;
comm_time_thr[t] = 0.0;
comm_bytes_thr[t]=0;
shm_bytes_thr[t]=0;
if ( first == 0.0 ) first = t0; // first is t0
if ( (t0 > 0.0) && ( t0 < first ) ) first = t0; // min time seen
if ( t1 > last ) last = t1; // max time seen
}
commtime+= last-first;
}
////////////////////////////////////////////////////////////////////////
// Non blocking send and receive. Necessarily parallel.
////////////////////////////////////////////////////////////////////////
void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
{
reqs.resize(Packets.size());
commtime-=usecond();
for(int i=0;i<Packets.size();i++){
_grid->StencilSendToRecvFromBegin(reqs[i],
Packets[i].send_buf,
Packets[i].to_rank,Packets[i].do_send,
Packets[i].recv_buf,
Packets[i].from_rank,Packets[i].do_recv,
Packets[i].bytes,i);
uint64_t bytes=_grid->StencilSendToRecvFromBegin(reqs[i],
Packets[i].send_buf,
Packets[i].to_rank,
Packets[i].recv_buf,
Packets[i].from_rank,
Packets[i].bytes,i);
comms_bytes+=bytes;
shm_bytes +=2*Packets[i].bytes-bytes;
}
_grid->StencilBarrier();// Synch shared memory on a single nodes
}
void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
@ -373,34 +418,36 @@ public:
for(int i=0;i<Packets.size();i++){
_grid->StencilSendToRecvFromComplete(reqs[i],i);
}
commtime+=usecond();
}
////////////////////////////////////////////////////////////////////////
// Blocking send and receive. Either sequential or parallel.
////////////////////////////////////////////////////////////////////////
void Communicate(void)
{
if ( CartesianCommunicator::CommunicatorPolicy == CartesianCommunicator::CommunicatorPolicySequential ){
/////////////////////////////////////////////////////////
// several way threaded on different communicators.
// Cannot combine with Dirichlet operators
// This scheme is needed on Intel Omnipath for best performance
// Deprecate once there are very few omnipath clusters
/////////////////////////////////////////////////////////
int nthreads = CartesianCommunicator::nCommThreads;
int old = GridThread::GetThreads();
GridThread::SetThreads(nthreads);
thread_for(i,Packets.size(),{
_grid->StencilSendToRecvFrom(Packets[i].send_buf,
Packets[i].to_rank,Packets[i].do_send,
Packets[i].recv_buf,
Packets[i].from_rank,Packets[i].do_recv,
Packets[i].bytes,i);
});
GridThread::SetThreads(old);
} else {
/////////////////////////////////////////////////////////
// Concurrent and non-threaded asynch calls to MPI
/////////////////////////////////////////////////////////
if ( 0 ){
thread_region {
// must be called in parallel region
int mythread = thread_num();
int maxthreads= thread_max();
int nthreads = CartesianCommunicator::nCommThreads;
assert(nthreads <= maxthreads);
if (nthreads == -1) nthreads = 1;
if (mythread < nthreads) {
for (int i = mythread; i < Packets.size(); i += nthreads) {
double start = usecond();
uint64_t bytes= _grid->StencilSendToRecvFrom(Packets[i].send_buf,
Packets[i].to_rank,
Packets[i].recv_buf,
Packets[i].from_rank,
Packets[i].bytes,i);
comm_bytes_thr[mythread] += bytes;
shm_bytes_thr[mythread] += Packets[i].bytes - bytes;
comm_time_thr[mythread] += usecond() - start;
}
}
}
} else { // Concurrent and non-threaded asynch calls to MPI
std::vector<std::vector<CommsRequest_t> > reqs;
this->CommunicateBegin(reqs);
this->CommunicateComplete(reqs);
@ -442,23 +489,31 @@ public:
sshift[1] = _grid->CheckerBoardShiftForCB(this->_checkerboard,dimension,shift,Odd);
if ( sshift[0] == sshift[1] ) {
if (splice_dim) {
auto tmp = GatherSimd(source,dimension,shift,0x3,compress,face_idx,point);
splicetime-=usecond();
auto tmp = GatherSimd(source,dimension,shift,0x3,compress,face_idx);
is_same_node = is_same_node && tmp;
splicetime+=usecond();
} else {
auto tmp = Gather(source,dimension,shift,0x3,compress,face_idx,point);
nosplicetime-=usecond();
auto tmp = Gather(source,dimension,shift,0x3,compress,face_idx);
is_same_node = is_same_node && tmp;
nosplicetime+=usecond();
}
} else {
if(splice_dim){
splicetime-=usecond();
// if checkerboard is unfavourable take two passes
// both with block stride loop iteration
auto tmp1 = GatherSimd(source,dimension,shift,0x1,compress,face_idx,point);
auto tmp2 = GatherSimd(source,dimension,shift,0x2,compress,face_idx,point);
auto tmp1 = GatherSimd(source,dimension,shift,0x1,compress,face_idx);
auto tmp2 = GatherSimd(source,dimension,shift,0x2,compress,face_idx);
is_same_node = is_same_node && tmp1 && tmp2;
splicetime+=usecond();
} else {
auto tmp1 = Gather(source,dimension,shift,0x1,compress,face_idx,point);
auto tmp2 = Gather(source,dimension,shift,0x2,compress,face_idx,point);
nosplicetime-=usecond();
auto tmp1 = Gather(source,dimension,shift,0x1,compress,face_idx);
auto tmp2 = Gather(source,dimension,shift,0x2,compress,face_idx);
is_same_node = is_same_node && tmp1 && tmp2;
nosplicetime+=usecond();
}
}
}
@ -468,10 +523,13 @@ public:
template<class compressor>
void HaloGather(const Lattice<vobj> &source,compressor &compress)
{
mpi3synctime_g-=usecond();
_grid->StencilBarrier();// Synch shared memory on a single nodes
mpi3synctime_g+=usecond();
// conformable(source.Grid(),_grid);
assert(source.Grid()==_grid);
halogtime-=usecond();
u_comm_offset=0;
@ -485,6 +543,7 @@ public:
assert(u_comm_offset==_unified_buffer_size);
accelerator_barrier();
halogtime+=usecond();
}
/////////////////////////
@ -497,70 +556,14 @@ public:
Mergers.resize(0);
MergersSHM.resize(0);
Packets.resize(0);
CopyReceiveBuffers.resize(0);
CachedTransfers.resize(0);
calls++;
}
void AddCopy(void *from,void * to, Integer bytes)
{
CopyReceiveBuffer obj;
obj.from_p = from;
obj.to_p = to;
obj.bytes= bytes;
CopyReceiveBuffers.push_back(obj);
}
void CommsCopy()
{
// These are device resident MPI buffers.
for(int i=0;i<CopyReceiveBuffers.size();i++){
cobj *from=(cobj *)CopyReceiveBuffers[i].from_p;
cobj *to =(cobj *)CopyReceiveBuffers[i].to_p;
Integer words = CopyReceiveBuffers[i].bytes/sizeof(cobj);
accelerator_forNB(j, words, cobj::Nsimd(), {
coalescedWrite(to[j] ,coalescedRead(from [j]));
});
}
}
Integer CheckForDuplicate(Integer direction, Integer OrthogPlane, Integer DestProc, void *recv_buf,Integer lane,Integer bytes,Integer cb)
{
CachedTransfer obj;
obj.direction = direction;
obj.OrthogPlane = OrthogPlane;
obj.DestProc = DestProc;
obj.recv_buf = recv_buf;
obj.lane = lane;
obj.bytes = bytes;
obj.cb = cb;
for(int i=0;i<CachedTransfers.size();i++){
if ( (CachedTransfers[i].direction ==direction)
&&(CachedTransfers[i].OrthogPlane==OrthogPlane)
&&(CachedTransfers[i].DestProc ==DestProc)
&&(CachedTransfers[i].bytes ==bytes)
&&(CachedTransfers[i].lane ==lane)
&&(CachedTransfers[i].cb ==cb)
){
AddCopy(CachedTransfers[i].recv_buf,recv_buf,bytes);
return 1;
}
}
CachedTransfers.push_back(obj);
return 0;
}
void AddPacket(void *xmit,void * rcv,
Integer to, Integer do_send,
Integer from, Integer do_recv,
Integer bytes){
void AddPacket(void *xmit,void * rcv, Integer to,Integer from,Integer bytes){
Packet p;
p.send_buf = xmit;
p.recv_buf = rcv;
p.to_rank = to;
p.from_rank= from;
p.do_send = do_send;
p.do_recv = do_recv;
p.bytes = bytes;
Packets.push_back(p);
}
@ -571,7 +574,7 @@ public:
d.buffer_size = buffer_size;
dv.push_back(d);
}
void AddMerge(cobj *merge_p,Vector<cobj *> &rpointers,Integer buffer_size,Integer type,std::vector<Merge> &mv) {
void AddMerge(cobj *merge_p,std::vector<cobj *> &rpointers,Integer buffer_size,Integer type,std::vector<Merge> &mv) {
Merge m;
m.type = type;
m.mpointer = merge_p;
@ -580,17 +583,23 @@ public:
mv.push_back(m);
}
template<class decompressor> void CommsMerge(decompressor decompress) {
CommsCopy();
CommsMerge(decompress,Mergers,Decompressions);
}
template<class decompressor> void CommsMergeSHM(decompressor decompress) {
mpi3synctime-=usecond();
accelerator_barrier();
_grid->StencilBarrier();// Synch shared memory on a single nodes
mpi3synctime+=usecond();
shmmergetime-=usecond();
CommsMerge(decompress,MergersSHM,DecompressionsSHM);
shmmergetime+=usecond();
}
template<class decompressor>
void CommsMerge(decompressor decompress,std::vector<Merge> &mm,std::vector<Decompress> &dd)
{
void CommsMerge(decompressor decompress,std::vector<Merge> &mm,std::vector<Decompress> &dd) {
mergetime-=usecond();
for(int i=0;i<mm.size();i++){
auto mp = &mm[i].mpointer[0];
auto vp0= &mm[i].vpointers[0][0];
@ -600,7 +609,9 @@ public:
decompress.Exchange(mp,vp0,vp1,type,o);
});
}
mergetime+=usecond();
decompresstime-=usecond();
for(int i=0;i<dd.size();i++){
auto kp = dd[i].kernel_p;
auto mp = dd[i].mpi_p;
@ -608,6 +619,7 @@ public:
decompress.Decompress(kp,mp,o);
});
}
decompresstime+=usecond();
}
////////////////////////////////////////
// Set up routines
@ -640,60 +652,22 @@ public:
}
}
if(local == 0) {
for(int s=0;s<Ls;s++){
surface_list.push_back(site*Ls+s);
}
}
}
}
/// Introduce a block structure and switch off comms on boundaries
void DirichletBlock(const Coordinate &dirichlet_block)
{
for(int ii=0;ii<this->_npoints;ii++){
int dimension = this->_directions[ii];
int displacement = this->_distances[ii];
int gd = _grid->_gdimensions[dimension];
int fd = _grid->_fdimensions[dimension];
int pd = _grid->_processors [dimension];
int pc = _grid->_processor_coor[dimension];
int ld = fd/pd;
///////////////////////////////////////////
// Figure out dirichlet send and receive
// on this leg of stencil.
///////////////////////////////////////////
int comm_dim = _grid->_processors[dimension] >1 ;
int block = dirichlet_block[dimension];
this->_comms_send[ii] = comm_dim;
this->_comms_recv[ii] = comm_dim;
if ( block && comm_dim ) {
assert(abs(displacement) < ld );
// Quiesce communication across block boundaries
if( displacement > 0 ) {
// High side, low side
// | <--B--->|
// | | |
// noR
// noS
if ( ( (ld*(pc+1) ) % block ) == 0 ) this->_comms_recv[ii] = 0;
if ( ( (ld*pc ) % block ) == 0 ) this->_comms_send[ii] = 0;
} else {
// High side, low side
// | <--B--->|
// | | |
// noS
// noR
if ( ( (ld*(pc+1) ) % block ) == 0 ) this->_comms_send[ii] = 0;
if ( ( (ld*pc ) % block ) == 0 ) this->_comms_recv[ii] = 0;
}
surface_list.push_back(site);
}
}
}
CartesianStencil(GridBase *grid,
int npoints,
int checkerboard,
const std::vector<int> &directions,
const std::vector<int> &distances,
Parameters p)
: shm_bytes_thr(npoints),
comm_bytes_thr(npoints),
comm_enter_thr(npoints),
comm_leave_thr(npoints),
comm_time_thr(npoints)
{
face_table_computed=0;
_grid = grid;
@ -707,12 +681,8 @@ public:
this->_simd_layout = _grid->_simd_layout; // copy simd_layout to give access to Accelerator Kernels
this->_directions = StencilVector(directions);
this->_distances = StencilVector(distances);
this->_comms_send.resize(npoints);
this->_comms_recv.resize(npoints);
this->same_node.resize(npoints);
if ( p.dirichlet.size() ) DirichletBlock(p.dirichlet); // comms send/recv set up
_unified_buffer_size=0;
surface_list.resize(0);
@ -729,16 +699,15 @@ public:
int displacement = distances[i];
int shift = displacement;
int gd = _grid->_gdimensions[dimension];
int fd = _grid->_fdimensions[dimension];
int pd = _grid->_processors [dimension];
// int ld = gd/pd;
int rd = _grid->_rdimensions[dimension];
int pc = _grid->_processor_coor[dimension];
this->_permute_type[point]=_grid->PermuteType(dimension);
this->_checkerboard = checkerboard;
//////////////////////////
// the permute type
//////////////////////////
int simd_layout = _grid->_simd_layout[dimension];
int comm_dim = _grid->_processors[dimension] >1 ;
int splice_dim = _grid->_simd_layout[dimension]>1 && (comm_dim);
@ -747,6 +716,7 @@ public:
assert ( (rotate_dim && comm_dim) == false) ; // Do not think spread out is supported
int sshift[2];
//////////////////////////
// Underlying approach. For each local site build
// up a table containing the npoint "neighbours" and whether they
@ -847,7 +817,6 @@ public:
GridBase *grid=_grid;
const int Nsimd = grid->Nsimd();
int comms_recv = this->_comms_recv[point];
int fd = _grid->_fdimensions[dimension];
int ld = _grid->_ldimensions[dimension];
int rd = _grid->_rdimensions[dimension];
@ -873,14 +842,12 @@ public:
for(int x=0;x<rd;x++){
int permute_type=grid->PermuteType(dimension);
int permute_slice;
int sx = (x+sshift)%rd;
int offnode = 0;
if ( simd_layout > 1 ) {
permute_slice=1;
for(int i=0;i<Nsimd;i++){
int inner_bit = (Nsimd>>(permute_type+1));
@ -897,7 +864,6 @@ public:
} else {
int comm_proc = ((x+sshift)/rd)%pd;
offnode = (comm_proc!= 0);
permute_slice=0;
}
int wraparound=0;
@ -907,31 +873,25 @@ public:
if ( (shiftpm== 1) && (sx<x) && (grid->_processor_coor[dimension]==grid->_processors[dimension]-1) ) {
wraparound = 1;
}
if (!offnode) {
// Wrap locally dirichlet support case OR node local
if ( offnode==0 ) {
permute_slice=0;
int permute_slice=0;
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
} else {
if ( comms_recv ) {
ScatterPlane(point,dimension,x,cbmask,_unified_buffer_size,wraparound); // permute/extract/merge is done in comms phase
} else {
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
}
}
if ( offnode ) {
int words = buffer_size;
if (cbmask != 0x3) words=words>>1;
// int rank = grid->_processor;
// int recv_from_rank;
// int xmit_to_rank;
int unified_buffer_offset = _unified_buffer_size;
_unified_buffer_size += words;
ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset,wraparound); // permute/extract/merge is done in comms phase
}
}
}
@ -1030,14 +990,11 @@ public:
}
template<class compressor>
int Gather(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor & compress,int &face_idx, int point)
int Gather(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor & compress,int &face_idx)
{
typedef typename cobj::vector_type vector_type;
typedef typename cobj::scalar_type scalar_type;
int comms_send = this->_comms_send[point] ;
int comms_recv = this->_comms_recv[point] ;
assert(rhs.Grid()==_grid);
// conformable(_grid,rhs.Grid());
@ -1060,93 +1017,78 @@ public:
int sx = (x+sshift)%rd;
int comm_proc = ((x+sshift)/rd)%pd;
if (comm_proc) {
int words = buffer_size;
if (cbmask != 0x3) words=words>>1;
int bytes = words * compress.CommDatumSize();
int so = sx*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
int comm_off = u_comm_offset;
if ( !face_table_computed ) {
face_table.resize(face_idx+1);
std::vector<std::pair<int,int> > face_table_host ;
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table_host);
face_table[face_idx].resize(face_table_host.size());
acceleratorCopyToDevice(&face_table_host[0],
&face_table[face_idx][0],
face_table[face_idx].size()*sizeof(face_table_host[0]));
}
// int rank = _grid->_processor;
int recv_from_rank;
int xmit_to_rank;
cobj *recv_buf;
cobj *send_buf;
_grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
assert (xmit_to_rank != _grid->ThisRank());
assert (recv_from_rank != _grid->ThisRank());
if( comms_send ) {
if ( !face_table_computed ) {
face_table.resize(face_idx+1);
std::vector<std::pair<int,int> > face_table_host ;
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host);
face_table[face_idx].resize(face_table_host.size());
acceleratorCopyToDevice(&face_table_host[0],
&face_table[face_idx][0],
face_table[face_idx].size()*sizeof(face_table_host[0]));
}
if ( compress.DecompressionStep() ) {
recv_buf=u_simd_recv_buf[0];
} else {
recv_buf=this->u_recv_buf_p;
}
send_buf = this->u_send_buf_p; // Gather locally, must send
////////////////////////////////////////////////////////
// Gather locally
////////////////////////////////////////////////////////
assert(send_buf!=NULL);
Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,comm_off,so);
cobj *recv_buf;
if ( compress.DecompressionStep() ) {
recv_buf=u_simd_recv_buf[0];
} else {
recv_buf=this->u_recv_buf_p;
}
int duplicate = CheckForDuplicate(dimension,sx,comm_proc,(void *)&recv_buf[comm_off],0,bytes,cbmask);
if ( (!duplicate) ) { // Force comms for now
cobj *send_buf;
send_buf = this->u_send_buf_p; // Gather locally, must send
///////////////////////////////////////////////////////////
// Build a list of things to do after we synchronise GPUs
// Start comms now???
///////////////////////////////////////////////////////////
AddPacket((void *)&send_buf[comm_off],
(void *)&recv_buf[comm_off],
xmit_to_rank, comms_send,
recv_from_rank, comms_recv,
bytes);
}
if ( compress.DecompressionStep() && comms_recv ) {
AddDecompress(&this->u_recv_buf_p[comm_off],
&recv_buf[comm_off],
////////////////////////////////////////////////////////
// Gather locally
////////////////////////////////////////////////////////
gathertime-=usecond();
assert(send_buf!=NULL);
Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so); face_idx++;
gathertime+=usecond();
///////////////////////////////////////////////////////////
// Build a list of things to do after we synchronise GPUs
// Start comms now???
///////////////////////////////////////////////////////////
AddPacket((void *)&send_buf[u_comm_offset],
(void *)&recv_buf[u_comm_offset],
xmit_to_rank,
recv_from_rank,
bytes);
if ( compress.DecompressionStep() ) {
AddDecompress(&this->u_recv_buf_p[u_comm_offset],
&recv_buf[u_comm_offset],
words,Decompressions);
}
u_comm_offset+=words;
face_idx++;
}
}
return 0;
}
template<class compressor>
int GatherSimd(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor &compress,int & face_idx,int point)
int GatherSimd(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor &compress,int & face_idx)
{
const int Nsimd = _grid->Nsimd();
const int maxl =2;// max layout in a direction
int comms_send = this->_comms_send[point] ;
int comms_recv = this->_comms_recv[point] ;
int fd = _grid->_fdimensions[dimension];
int rd = _grid->_rdimensions[dimension];
int ld = _grid->_ldimensions[dimension];
@ -1161,6 +1103,7 @@ public:
int permute_type=_grid->PermuteType(dimension);
// std::cout << "SimdNew permute type "<<permute_type<<std::endl;
///////////////////////////////////////////////
// Simd direction uses an extract/merge pair
@ -1177,8 +1120,8 @@ public:
int bytes = (reduced_buffer_size*datum_bytes)/simd_layout;
assert(bytes*simd_layout == reduced_buffer_size*datum_bytes);
Vector<cobj *> rpointers(maxl);
Vector<cobj *> spointers(maxl);
std::vector<cobj *> rpointers(maxl);
std::vector<cobj *> spointers(maxl);
///////////////////////////////////////////
// Work out what to send where
@ -1194,9 +1137,8 @@ public:
if ( any_offnode ) {
int comm_off = u_comm_offset;
for(int i=0;i<maxl;i++){
spointers[i] = (cobj *) &u_simd_send_buf[i][comm_off];
spointers[i] = (cobj *) &u_simd_send_buf[i][u_comm_offset];
}
int sx = (x+sshift)%rd;
@ -1205,17 +1147,18 @@ public:
face_table.resize(face_idx+1);
std::vector<std::pair<int,int> > face_table_host ;
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host);
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table_host);
face_table[face_idx].resize(face_table_host.size());
acceleratorCopyToDevice(&face_table_host[0],
&face_table[face_idx][0],
face_table[face_idx].size()*sizeof(face_table_host[0]));
}
gathermtime-=usecond();
if ( comms_send || comms_recv )
Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type);
Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type);
face_idx++;
gathermtime+=usecond();
//spointers[0] -- low
//spointers[1] -- high
@ -1232,8 +1175,8 @@ public:
int nbr_plane = nbr_ic;
assert (sx == nbr_ox);
auto rp = &u_simd_recv_buf[i ][comm_off];
auto sp = &u_simd_send_buf[nbr_plane][comm_off];
auto rp = &u_simd_recv_buf[i ][u_comm_offset];
auto sp = &u_simd_send_buf[nbr_plane][u_comm_offset];
if(nbr_proc){
@ -1244,13 +1187,8 @@ public:
rpointers[i] = rp;
int duplicate = CheckForDuplicate(dimension,sx,nbr_proc,(void *)rp,i,bytes,cbmask);
if ( !duplicate ) {
AddPacket((void *)sp,(void *)rp,
xmit_to_rank,comms_send,
recv_from_rank,comms_recv,
bytes);
}
AddPacket((void *)sp,(void *)rp,xmit_to_rank,recv_from_rank,bytes);
} else {
@ -1259,12 +1197,9 @@ public:
}
}
if ( comms_recv ) {
AddMerge(&this->u_recv_buf_p[comm_off],rpointers,reduced_buffer_size,permute_type,Mergers);
}
AddMerge(&this->u_recv_buf_p[u_comm_offset],rpointers,reduced_buffer_size,permute_type,Mergers);
u_comm_offset +=buffer_size;
}
}
return 0;

View File

@ -55,7 +55,7 @@ template<class vtype, int N> accelerator_inline iVector<vtype, N> Exponentiate(c
// Specialisation: Cayley-Hamilton exponential for SU(3)
#ifndef GRID_ACCELERATED
#ifndef GRID_CUDA
template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr>
accelerator_inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP )
{

View File

@ -208,46 +208,5 @@ void merge(vobj &vec,const ExtractPointerArray<sobj> &extracted, int offset)
}
//////////////////////////////////////////////////////////////////////////////////
//Copy a single lane of a SIMD tensor type from one object to another
//Output object must be of the same tensor type but may be of a different precision (i.e. it can have a different root data type)
///////////////////////////////////////////////////////////////////////////////////
template<class vobjOut, class vobjIn>
accelerator_inline
void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __restrict__ vecIn, int lane_in)
{
static_assert( std::is_same<typename vobjOut::DoublePrecision, typename vobjIn::DoublePrecision>::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same
typedef typename vobjOut::vector_type ovector_type;
typedef typename vobjIn::vector_type ivector_type;
constexpr int owords=sizeof(vobjOut)/sizeof(ovector_type);
constexpr int iwords=sizeof(vobjIn)/sizeof(ivector_type);
static_assert( owords == iwords, "copyLane: Expected number of vector words in input and output objects to be equal" );
typedef typename vobjOut::scalar_type oscalar_type;
typedef typename vobjIn::scalar_type iscalar_type;
typedef typename ExtractTypeMap<oscalar_type>::extract_type oextract_type;
typedef typename ExtractTypeMap<iscalar_type>::extract_type iextract_type;
typedef oextract_type * opointer;
typedef iextract_type * ipointer;
constexpr int oNsimd=ovector_type::Nsimd();
constexpr int iNsimd=ivector_type::Nsimd();
iscalar_type itmp;
oscalar_type otmp;
opointer __restrict__ op = (opointer)&vecOut;
ipointer __restrict__ ip = (ipointer)&vecIn;
for(int w=0;w<owords;w++){
memcpy( (char*)&itmp, (char*)(ip + lane_in + iNsimd*w), sizeof(iscalar_type) );
otmp = itmp; //potential precision change
memcpy( (char*)(op + lane_out + oNsimd*w), (char*)&otmp, sizeof(oscalar_type) );
}
}
NAMESPACE_END(Grid);

View File

@ -6,17 +6,9 @@ uint32_t accelerator_threads=2;
uint32_t acceleratorThreads(void) {return accelerator_threads;};
void acceleratorThreads(uint32_t t) {accelerator_threads = t;};
#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK"
#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK"
#define ENV_LOCAL_RANK_SLURM "SLURM_LOCALID"
#define ENV_RANK_SLURM "SLURM_PROCID"
#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK"
#ifdef GRID_CUDA
cudaDeviceProp *gpu_props;
cudaStream_t copyStream;
cudaStream_t cpuStream;
void acceleratorInit(void)
{
int nDevices = 1;
@ -25,6 +17,12 @@ void acceleratorInit(void)
char * localRankStr = NULL;
int rank = 0, world_rank=0;
#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK"
#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK"
#define ENV_LOCAL_RANK_SLURM "SLURM_LOCALID"
#define ENV_RANK_SLURM "SLURM_PROCID"
#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK"
if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);}
if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
if ((localRankStr = getenv(ENV_RANK_SLURM )) != NULL) { world_rank = atoi(localRankStr);}
@ -99,7 +97,6 @@ void acceleratorInit(void)
cudaSetDevice(device);
cudaStreamCreate(&copyStream);
cudaStreamCreate(&cpuStream);
const int len=64;
char busid[len];
if( rank == world_rank ) {
@ -114,7 +111,6 @@ void acceleratorInit(void)
#ifdef GRID_HIP
hipDeviceProp_t *gpu_props;
hipStream_t copyStream;
hipStream_t cpuStream;
void acceleratorInit(void)
{
int nDevices = 1;
@ -123,6 +119,10 @@ void acceleratorInit(void)
char * localRankStr = NULL;
int rank = 0, world_rank=0;
#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK"
#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK"
#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK"
// We extract the local rank initialization using an environment variable
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
{
@ -134,10 +134,8 @@ void acceleratorInit(void)
}
if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);}
if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
if ((localRankStr = getenv(ENV_RANK_SLURM )) != NULL) { world_rank = atoi(localRankStr);}
if ( world_rank == 0 )
printf("world_rank %d has %d devices\n",world_rank,nDevices);
printf("world_rank %d has %d devices\n",world_rank,nDevices);
size_t totalDeviceMem=0;
for (int i = 0; i < nDevices; i++) {
@ -183,7 +181,6 @@ void acceleratorInit(void)
#endif
hipSetDevice(device);
hipStreamCreate(&copyStream);
hipStreamCreate(&cpuStream);
const int len=64;
char busid[len];
if( rank == world_rank ) {
@ -198,12 +195,15 @@ void acceleratorInit(void)
#ifdef GRID_SYCL
cl::sycl::queue *theGridAccelerator;
cl::sycl::queue *theCopyAccelerator;
void acceleratorInit(void)
{
int nDevices = 1;
cl::sycl::gpu_selector selector;
cl::sycl::device selectedDevice { selector };
theGridAccelerator = new sycl::queue (selectedDevice);
// theCopyAccelerator = new sycl::queue (selectedDevice);
theCopyAccelerator = theGridAccelerator; // Should proceed concurrenlty anyway.
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
zeInit(0);
@ -211,7 +211,10 @@ void acceleratorInit(void)
char * localRankStr = NULL;
int rank = 0, world_rank=0;
#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK"
#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK"
#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK"
// We extract the local rank initialization using an environment variable
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
{

View File

@ -107,7 +107,6 @@ void acceleratorInit(void);
extern int acceleratorAbortOnGpuError;
extern cudaStream_t copyStream;
extern cudaStream_t cpuStream;
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
#ifdef GRID_SIMT
@ -135,7 +134,7 @@ inline void cuda_mem(void)
}; \
dim3 cu_threads(nsimd,acceleratorThreads(),1); \
dim3 cu_blocks ((num1+nt-1)/nt,num2,1); \
LambdaApply<<<cu_blocks,cu_threads,0,cpuStream>>>(num1,num2,nsimd,lambda); \
LambdaApply<<<cu_blocks,cu_threads>>>(num1,num2,nsimd,lambda); \
}
#define accelerator_for6dNB(iter1, num1, \
@ -154,7 +153,7 @@ inline void cuda_mem(void)
}; \
dim3 cu_blocks (num1,num2,num3); \
dim3 cu_threads(num4,num5,num6); \
Lambda6Apply<<<cu_blocks,cu_threads,0,cpuStream>>>(num1,num2,num3,num4,num5,num6,lambda); \
Lambda6Apply<<<cu_blocks,cu_threads>>>(num1,num2,num3,num4,num5,num6,lambda); \
}
template<typename lambda> __global__
@ -190,7 +189,7 @@ void Lambda6Apply(uint64_t num1, uint64_t num2, uint64_t num3,
#define accelerator_barrier(dummy) \
{ \
cudaStreamSynchronize(cpuStream); \
cudaDeviceSynchronize(); \
cudaError err = cudaGetLastError(); \
if ( cudaSuccess != err ) { \
printf("accelerator_barrier(): Cuda error %s \n", \
@ -248,7 +247,6 @@ inline int acceleratorIsCommunicable(void *ptr)
//////////////////////////////////////////////
// SyCL acceleration
//////////////////////////////////////////////
#ifdef GRID_SYCL
NAMESPACE_END(Grid);
#include <CL/sycl.hpp>
@ -263,6 +261,7 @@ NAMESPACE_END(Grid);
NAMESPACE_BEGIN(Grid);
extern cl::sycl::queue *theGridAccelerator;
extern cl::sycl::queue *theCopyAccelerator;
#ifdef __SYCL_DEVICE_ONLY__
#define GRID_SIMT
@ -290,7 +289,7 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
cgh.parallel_for( \
cl::sycl::nd_range<3>(global,local), \
[=] (cl::sycl::nd_item<3> item) /*mutable*/ \
[[intel::reqd_sub_group_size(8)]] \
[[intel::reqd_sub_group_size(16)]] \
{ \
auto iter1 = item.get_global_id(0); \
auto iter2 = item.get_global_id(1); \
@ -299,19 +298,19 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
}); \
});
#define accelerator_barrier(dummy) theGridAccelerator->wait();
#define accelerator_barrier(dummy) { theGridAccelerator->wait(); }
inline void *acceleratorAllocShared(size_t bytes){ return malloc_shared(bytes,*theGridAccelerator);};
inline void *acceleratorAllocDevice(size_t bytes){ return malloc_device(bytes,*theGridAccelerator);};
inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);};
inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) {
theGridAccelerator->memcpy(to,from,bytes);
}
inline void acceleratorCopySynchronise(void) { theGridAccelerator->wait(); std::cout<<"acceleratorCopySynchronise() wait "<<std::endl; }
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
inline void acceleratorMemSet(void *base,int value,size_t bytes) { theGridAccelerator->memset(base,value,bytes); theGridAccelerator->wait();}
inline void acceleratorCopySynchronise(void) { theCopyAccelerator->wait(); }
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes);}
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
inline void acceleratorMemSet(void *base,int value,size_t bytes) { theCopyAccelerator->memset(base,value,bytes); theCopyAccelerator->wait();}
inline int acceleratorIsCommunicable(void *ptr)
{
#if 0
@ -340,7 +339,6 @@ NAMESPACE_BEGIN(Grid);
#define accelerator_inline __host__ __device__ inline
extern hipStream_t copyStream;
extern hipStream_t cpuStream;
/*These routines define mapping from thread grid to loop & vector lane indexing */
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
#ifdef GRID_SIMT
@ -362,12 +360,12 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
dim3 hip_blocks ((num1+nt-1)/nt,num2,1); \
if(hip_threads.x * hip_threads.y * hip_threads.z <= 64){ \
hipLaunchKernelGGL(LambdaApply64,hip_blocks,hip_threads, \
0,cpuStream, \
num1,num2,nsimd, lambda); \
0,0, \
num1,num2,nsimd, lambda); \
} else { \
hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads, \
0,cpuStream, \
num1,num2,nsimd, lambda); \
0,0, \
num1,num2,nsimd, lambda); \
} \
}
@ -400,7 +398,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
#define accelerator_barrier(dummy) \
{ \
hipStreamSynchronize(cpuStream); \
hipDeviceSynchronize(); \
auto err = hipGetLastError(); \
if ( err != hipSuccess ) { \
printf("After hipDeviceSynchronize() : HIP error %s \n", hipGetErrorString( err )); \
@ -443,7 +441,7 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(bas
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
{
hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);
hipMemcpyAsync(to,from,bytes, hipMemcpyDeviceToDevice,copyStream);
}
inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); };
@ -463,8 +461,6 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream);
accelerator_for2dNB(iter1, num1, iter2, num2, nsimd, { __VA_ARGS__ } ); \
accelerator_barrier(dummy);
#define GRID_ACCELERATED
#endif
//////////////////////////////////////////////
@ -515,7 +511,16 @@ inline void *acceleratorAllocCpu(size_t bytes){return memalign(GRID_ALLOC_ALIGN,
inline void acceleratorFreeCpu (void *ptr){free(ptr);};
#endif
//////////////////////////////////////////////
// Fencing needed ONLY for SYCL
//////////////////////////////////////////////
#ifdef GRID_SYCL
inline void acceleratorFenceComputeStream(void){ accelerator_barrier();};
#else
// Ordering within a stream guaranteed on Nvidia & AMD
inline void acceleratorFenceComputeStream(void){ };
#endif
///////////////////////////////////////////////////
// Synchronise across local threads for divergence resynch

View File

@ -27,6 +27,7 @@
/* END LEGAL */
extern "C" {
#include <openssl/sha.h>
#include <openssl/evp.h>
}
#ifdef USE_IPP
#include "ipp.h"
@ -70,10 +71,8 @@ public:
static inline std::vector<unsigned char> sha256(const void *data,size_t bytes)
{
std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH);
SHA256_CTX sha256;
SHA256_Init (&sha256);
SHA256_Update(&sha256, data,bytes);
SHA256_Final (&hash[0], &sha256);
auto digest = EVP_get_digestbyname("SHA256");
EVP_Digest(data, bytes, &hash[0], NULL, digest, NULL);
return hash;
}
static inline std::vector<int> sha256_seeds(const std::string &s)

View File

@ -1,918 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./HMC/Mobius2p1fIDSDRGparityEOFA.cc
Copyright (C) 2015-2016
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace Grid;
//Production binary for the 40ID G-parity ensemble
struct RatQuoParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(RatQuoParameters,
double, bnd_lo,
double, bnd_hi,
Integer, action_degree,
double, action_tolerance,
Integer, md_degree,
double, md_tolerance,
Integer, reliable_update_freq,
Integer, bnd_check_freq);
RatQuoParameters() {
bnd_lo = 1e-2;
bnd_hi = 30;
action_degree = 10;
action_tolerance = 1e-10;
md_degree = 10;
md_tolerance = 1e-8;
bnd_check_freq = 20;
reliable_update_freq = 50;
}
void Export(RationalActionParams &into) const{
into.lo = bnd_lo;
into.hi = bnd_hi;
into.action_degree = action_degree;
into.action_tolerance = action_tolerance;
into.md_degree = md_degree;
into.md_tolerance = md_tolerance;
into.BoundsCheckFreq = bnd_check_freq;
}
};
struct EOFAparameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(EOFAparameters,
OneFlavourRationalParams, rat_params,
double, action_tolerance,
double, action_mixcg_inner_tolerance,
double, md_tolerance,
double, md_mixcg_inner_tolerance);
EOFAparameters() {
action_mixcg_inner_tolerance = 1e-8;
action_tolerance = 1e-10;
md_tolerance = 1e-8;
md_mixcg_inner_tolerance = 1e-8;
rat_params.lo = 1.0;
rat_params.hi = 25.0;
rat_params.MaxIter = 50000;
rat_params.tolerance= 1.0e-9;
rat_params.degree = 14;
rat_params.precision= 50;
}
};
struct EvolParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(EvolParameters,
Integer, StartTrajectory,
Integer, Trajectories,
Integer, SaveInterval,
Integer, Steps,
RealD, TrajectoryLength,
bool, MetropolisTest,
std::string, StartingType,
std::vector<Integer>, GparityDirs,
std::vector<EOFAparameters>, eofa_l,
RatQuoParameters, rat_quo_s,
RatQuoParameters, rat_quo_DSDR);
EvolParameters() {
//For initial thermalization; afterwards user should switch Metropolis on and use StartingType=CheckpointStart
MetropolisTest = false;
StartTrajectory = 0;
Trajectories = 50;
SaveInterval = 5;
StartingType = "ColdStart";
GparityDirs.resize(3, 1); //1 for G-parity, 0 for periodic
Steps = 5;
TrajectoryLength = 1.0;
}
};
bool fileExists(const std::string &fn){
std::ifstream f(fn);
return f.good();
}
struct LanczosParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
double, alpha,
double, beta,
double, mu,
int, ord,
int, n_stop,
int, n_want,
int, n_use,
double, tolerance);
LanczosParameters() {
alpha = 35;
beta = 5;
mu = 0;
ord = 100;
n_stop = 10;
n_want = 10;
n_use = 15;
tolerance = 1e-6;
}
};
template<typename FermionActionD, typename FermionFieldD>
void computeEigenvalues(std::string param_file,
GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt, //expect lattice to have been initialized to something
FermionActionD &action, GridParallelRNG &rng){
LanczosParameters params;
if(fileExists(param_file)){
std::cout << GridLogMessage << " Reading " << param_file << std::endl;
Grid::XmlReader rd(param_file);
read(rd, "LanczosParameters", params);
}else if(!GlobalSharedMemory::WorldRank){
std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
Grid::XmlWriter wr(param_file + ".templ");
write(wr, "LanczosParameters", params);
}
FermionFieldD gauss_o(rbGrid);
FermionFieldD gauss(Grid);
gaussian(rng, gauss);
pickCheckerboard(Odd, gauss_o, gauss);
action.ImportGauge(latt);
SchurDiagMooeeOperator<FermionActionD, FermionFieldD> hermop(action);
PlainHermOp<FermionFieldD> hermop_wrap(hermop);
//ChebyshevLanczos<FermionFieldD> Cheb(params.alpha, params.beta, params.mu, params.ord);
assert(params.mu == 0.0);
Chebyshev<FermionFieldD> Cheb(params.beta*params.beta, params.alpha*params.alpha, params.ord+1);
FunctionHermOp<FermionFieldD> Cheb_wrap(Cheb, hermop);
std::cout << "IRL: alpha=" << params.alpha << " beta=" << params.beta << " mu=" << params.mu << " ord=" << params.ord << std::endl;
ImplicitlyRestartedLanczos<FermionFieldD> IRL(Cheb_wrap, hermop_wrap, params.n_stop, params.n_want, params.n_use, params.tolerance, 50000);
std::vector<RealD> eval(params.n_use);
std::vector<FermionFieldD> evec(params.n_use, rbGrid);
int Nconv;
IRL.calc(eval, evec, gauss_o, Nconv);
std::cout << "Eigenvalues:" << std::endl;
for(int i=0;i<params.n_want;i++){
std::cout << i << " " << eval[i] << std::endl;
}
}
//Check the quality of the RHMC approx
//action_or_md toggles checking the action (0), MD (1) or both (2) setups
template<typename FermionActionD, typename FermionFieldD, typename RHMCtype>
void checkRHMC(GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt, //expect lattice to have been initialized to something
FermionActionD &numOp, FermionActionD &denOp, RHMCtype &rhmc, GridParallelRNG &rng,
int inv_pow, const std::string &quark_descr, int action_or_md){
assert(action_or_md == 0 || action_or_md == 1 || action_or_md == 2);
FermionFieldD gauss_o(rbGrid);
FermionFieldD gauss(Grid);
gaussian(rng, gauss);
pickCheckerboard(Odd, gauss_o, gauss);
numOp.ImportGauge(latt);
denOp.ImportGauge(latt);
typedef typename FermionActionD::Impl_t FermionImplPolicyD;
SchurDifferentiableOperator<FermionImplPolicyD> MdagM(numOp);
SchurDifferentiableOperator<FermionImplPolicyD> VdagV(denOp);
PowerMethod<FermionFieldD> power_method;
RealD lambda_max;
std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " numerator" << std::endl;
lambda_max = power_method(MdagM,gauss_o);
std::cout << GridLogMessage << "Got lambda_max "<<lambda_max<<std::endl;
std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " denominator" << std::endl;
lambda_max = power_method(VdagV,gauss_o);
std::cout << GridLogMessage << "Got lambda_max "<<lambda_max<<std::endl;
if(action_or_md == 0 || action_or_md == 2){
std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
InversePowerBoundsCheck(inv_pow, 50000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerAction); //use large tolerance to prevent exit on fail; we are trying to tune here!
std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
InversePowerBoundsCheck(2*inv_pow, 50000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerAction);
std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
InversePowerBoundsCheck(inv_pow, 50000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerAction);
std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
InversePowerBoundsCheck(2*inv_pow, 50000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerAction);
std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
}
std::cout << "-------------------------------------------------------------------------------" << std::endl;
if(action_or_md == 1 || action_or_md == 2){
std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
InversePowerBoundsCheck(inv_pow, 50000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerMD);
std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
InversePowerBoundsCheck(2*inv_pow, 50000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerMD);
std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
InversePowerBoundsCheck(inv_pow, 50000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerMD);
std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
InversePowerBoundsCheck(2*inv_pow, 50000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerMD);
std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
}
}
template<typename FermionImplPolicy>
void checkEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
GridCartesian* FGrid, GridParallelRNG &rng, const LatticeGaugeFieldD &latt){
std::cout << GridLogMessage << "Starting EOFA action/bounds check" << std::endl;
typename FermionImplPolicy::FermionField eta(FGrid);
RealD scale = std::sqrt(0.5);
gaussian(rng,eta); eta = eta * scale;
//Use the inbuilt check
EOFA.refresh(latt, eta);
EOFA.S(latt);
std::cout << GridLogMessage << "Finished EOFA upper action/bounds check" << std::endl;
}
template<typename FermionImplPolicy>
class EOFAlinop: public LinearOperatorBase<typename FermionImplPolicy::FermionField>{
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA;
LatticeGaugeFieldD &U;
public:
EOFAlinop(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){}
typedef typename FermionImplPolicy::FermionField Field;
void OpDiag (const Field &in, Field &out){ assert(0); }
void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); }
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); }
void Op (const Field &in, Field &out){ assert(0); }
void AdjOp (const Field &in, Field &out){ assert(0); }
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
void HermOp(const Field &in, Field &out){ EOFA.Meofa(U, in, out); }
};
template<typename FermionImplPolicy>
void upperBoundEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){
std::cout << GridLogMessage << "Starting EOFA upper bound compute" << std::endl;
EOFAlinop<FermionImplPolicy> linop(EOFA, latt);
typename FermionImplPolicy::FermionField eta(FGrid);
gaussian(rng,eta);
PowerMethod<typename FermionImplPolicy::FermionField> power_method;
auto lambda_max = power_method(linop,eta);
std::cout << GridLogMessage << "Upper bound of EOFA operator " << lambda_max << std::endl;
}
//Applications of M^{-1} cost the same as M for EOFA!
template<typename FermionImplPolicy>
class EOFAinvLinop: public LinearOperatorBase<typename FermionImplPolicy::FermionField>{
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA;
LatticeGaugeFieldD &U;
public:
EOFAinvLinop(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){}
typedef typename FermionImplPolicy::FermionField Field;
void OpDiag (const Field &in, Field &out){ assert(0); }
void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); }
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); }
void Op (const Field &in, Field &out){ assert(0); }
void AdjOp (const Field &in, Field &out){ assert(0); }
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
void HermOp(const Field &in, Field &out){ EOFA.MeofaInv(U, in, out); }
};
template<typename FermionImplPolicy>
void lowerBoundEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){
std::cout << GridLogMessage << "Starting EOFA lower bound compute using power method on M^{-1}. Inverse of highest eigenvalue is the lowest eigenvalue of M" << std::endl;
EOFAinvLinop<FermionImplPolicy> linop(EOFA, latt);
typename FermionImplPolicy::FermionField eta(FGrid);
gaussian(rng,eta);
PowerMethod<typename FermionImplPolicy::FermionField> power_method;
auto lambda_max = power_method(linop,eta);
std::cout << GridLogMessage << "Lower bound of EOFA operator " << 1./lambda_max << std::endl;
}
NAMESPACE_BEGIN(Grid);
template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class SchurOperatorF>
class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
public:
typedef typename FermionOperatorD::FermionField FieldD;
typedef typename FermionOperatorF::FermionField FieldF;
using OperatorFunction<FieldD>::operator();
RealD Tolerance;
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations;
Integer MaxOuterIterations;
GridBase* SinglePrecGrid4; //Grid for single-precision fields
GridBase* SinglePrecGrid5; //Grid for single-precision fields
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
FermionOperatorF &FermOpF;
FermionOperatorD &FermOpD;;
SchurOperatorF &LinOpF;
SchurOperatorD &LinOpD;
Integer TotalInnerIterations; //Number of inner CG iterations
Integer TotalOuterIterations; //Number of restarts
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
MixedPrecisionConjugateGradientOperatorFunction(RealD tol,
Integer maxinnerit,
Integer maxouterit,
GridBase* _sp_grid4,
GridBase* _sp_grid5,
FermionOperatorF &_FermOpF,
FermionOperatorD &_FermOpD,
SchurOperatorF &_LinOpF,
SchurOperatorD &_LinOpD):
LinOpF(_LinOpF),
LinOpD(_LinOpD),
FermOpF(_FermOpF),
FermOpD(_FermOpD),
Tolerance(tol),
InnerTolerance(tol),
MaxInnerIterations(maxinnerit),
MaxOuterIterations(maxouterit),
SinglePrecGrid4(_sp_grid4),
SinglePrecGrid5(_sp_grid5),
OuterLoopNormMult(100.)
{
};
void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
precisionChange(FermOpF.Umu, FermOpD.Umu);
pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
////////////////////////////////////////////////////////////////////////////////////
// Make a mixed precision conjugate gradient
////////////////////////////////////////////////////////////////////////////////////
MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
MPCG.InnerTolerance = InnerTolerance;
std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
MPCG(src,psi);
}
};
template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class SchurOperatorF>
class MixedPrecisionReliableUpdateConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
public:
typedef typename FermionOperatorD::FermionField FieldD;
typedef typename FermionOperatorF::FermionField FieldF;
using OperatorFunction<FieldD>::operator();
RealD Tolerance;
Integer MaxIterations;
RealD Delta; //reliable update parameter
GridBase* SinglePrecGrid4; //Grid for single-precision fields
GridBase* SinglePrecGrid5; //Grid for single-precision fields
FermionOperatorF &FermOpF;
FermionOperatorD &FermOpD;;
SchurOperatorF &LinOpF;
SchurOperatorD &LinOpD;
MixedPrecisionReliableUpdateConjugateGradientOperatorFunction(RealD tol,
RealD delta,
Integer maxit,
GridBase* _sp_grid4,
GridBase* _sp_grid5,
FermionOperatorF &_FermOpF,
FermionOperatorD &_FermOpD,
SchurOperatorF &_LinOpF,
SchurOperatorD &_LinOpD):
LinOpF(_LinOpF),
LinOpD(_LinOpD),
FermOpF(_FermOpF),
FermOpD(_FermOpD),
Tolerance(tol),
Delta(delta),
MaxIterations(maxit),
SinglePrecGrid4(_sp_grid4),
SinglePrecGrid5(_sp_grid5)
{
};
void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
std::cout << GridLogMessage << " Mixed precision reliable CG update wrapper operator() "<<std::endl;
SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
precisionChange(FermOpF.Umu, FermOpD.Umu);
pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
////////////////////////////////////////////////////////////////////////////////////
// Make a mixed precision conjugate gradient
////////////////////////////////////////////////////////////////////////////////////
ConjugateGradientReliableUpdate<FieldD,FieldF> MPCG(Tolerance,MaxIterations,Delta,SinglePrecGrid5,LinOpF,LinOpD);
std::cout << GridLogMessage << "Calling mixed precision reliable update Conjugate Gradient" <<std::endl;
MPCG(src,psi);
}
};
NAMESPACE_END(Grid);
int main(int argc, char **argv) {
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
// here make a routine to print all the relevant information on the run
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
std::string param_file = "params.xml";
bool file_load_check = false;
std::string serial_seeds = "1 2 3 4 5";
std::string parallel_seeds = "6 7 8 9 10";
int i=1;
while(i < argc){
std::string sarg(argv[i]);
if(sarg == "--param_file"){
assert(i!=argc-1);
param_file = argv[i+1];
i+=2;
}else if(sarg == "--read_check"){ //check the fields load correctly and pass checksum/plaquette repro
file_load_check = true;
i++;
}else if(sarg == "--set_seeds"){ //set the rng seeds. Expects two vector args, e.g. --set_seeds 1.2.3.4 5.6.7.8
assert(i < argc-2);
std::vector<int> tmp;
GridCmdOptionIntVector(argv[i+1],tmp);
{
std::stringstream ss;
for(int j=0;j<tmp.size()-1;j++) ss << tmp[j] << " ";
ss << tmp.back();
serial_seeds = ss.str();
}
GridCmdOptionIntVector(argv[i+2],tmp);
{
std::stringstream ss;
for(int j=0;j<tmp.size()-1;j++) ss << tmp[j] << " ";
ss << tmp.back();
parallel_seeds = ss.str();
}
i+=3;
std::cout << GridLogMessage << "Set serial seeds to " << serial_seeds << std::endl;
std::cout << GridLogMessage << "Set parallel seeds to " << parallel_seeds << std::endl;
}else{
i++;
}
}
//Read the user parameters
EvolParameters user_params;
if(fileExists(param_file)){
std::cout << GridLogMessage << " Reading " << param_file << std::endl;
Grid::XmlReader rd(param_file);
read(rd, "Params", user_params);
}else if(!GlobalSharedMemory::WorldRank){
std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
{
Grid::XmlWriter wr(param_file + ".templ");
write(wr, "Params", user_params);
}
std::cout << GridLogMessage << " Done" << std::endl;
Grid_finalize();
return 0;
}
//Check the parameters
if(user_params.GparityDirs.size() != Nd-1){
std::cerr << "Error in input parameters: expect GparityDirs to have size = " << Nd-1 << std::endl;
exit(1);
}
for(int i=0;i<Nd-1;i++)
if(user_params.GparityDirs[i] != 0 && user_params.GparityDirs[i] != 1){
std::cerr << "Error in input parameters: expect GparityDirs values to be 0 (periodic) or 1 (G-parity)" << std::endl;
exit(1);
}
typedef GparityMobiusEOFAFermionD EOFAactionD;
typedef GparityMobiusFermionD FermionActionD;
typedef typename FermionActionD::Impl_t FermionImplPolicyD;
typedef typename FermionActionD::FermionField FermionFieldD;
typedef GparityMobiusEOFAFermionF EOFAactionF;
typedef GparityMobiusFermionF FermionActionF;
typedef typename FermionActionF::Impl_t FermionImplPolicyF;
typedef typename FermionActionF::FermionField FermionFieldF;
typedef GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicyD,FermionImplPolicyF> MixedPrecRHMC;
typedef GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicyD> DoublePrecRHMC;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
typedef ConjugateHMCRunnerD<MinimumNorm2> HMCWrapper; //NB: This is the "Omelyan integrator"
MD.name = std::string("MinimumNorm2");
// typedef ConjugateHMCRunnerD<ForceGradient> HMCWrapper;
// MD.name = std::string("ForceGradient");
MD.MDsteps = user_params.Steps;
MD.trajL = user_params.TrajectoryLength;
typedef HMCWrapper::ImplPolicy GaugeImplPolicy;
HMCparameters HMCparams;
HMCparams.StartTrajectory = user_params.StartTrajectory;
HMCparams.Trajectories = user_params.Trajectories;
HMCparams.NoMetropolisUntil= 0;
HMCparams.StartingType = user_params.StartingType;
HMCparams.MetropolisTest = user_params.MetropolisTest;
HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams);
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_lat";
CPparams.rng_prefix = "ckpoint_rng";
CPparams.saveInterval = user_params.SaveInterval;
CPparams.format = "IEEE64BIG";
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
//Note that checkpointing saves the RNG state so that this initialization is required only for the very first configuration
RNGModuleParameters RNGpar;
RNGpar.serial_seeds = serial_seeds;
RNGpar.parallel_seeds = parallel_seeds;
TheHMC.Resources.SetRNGSeeds(RNGpar);
typedef PlaquetteMod<GaugeImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
//aiming for ainv=1.723 GeV
// me bob
//Estimated a(ml+mres) [40ID] = 0.001305 0.00131
// a(mh+mres) [40ID] = 0.035910 0.03529
//Estimate Ls=12, b+c=2 mres~0.0011
//1/24/2022 initial mres measurement gives mres=0.001, adjusted light quark mass to 0.0003 from 0.0001
const int Ls = 12;
Real beta = 1.848;
Real light_mass = 0.0003;
Real strange_mass = 0.0342;
Real pv_mass = 1.0;
RealD M5 = 1.8;
RealD mobius_scale = 2.; //b+c
RealD mob_bmc = 1.0;
RealD mob_b = (mobius_scale + mob_bmc)/2.;
RealD mob_c = (mobius_scale - mob_bmc)/2.;
std::cout << GridLogMessage
<< "Ensemble parameters:" << std::endl
<< "Ls=" << Ls << std::endl
<< "beta=" << beta << std::endl
<< "light_mass=" << light_mass << std::endl
<< "strange_mass=" << strange_mass << std::endl
<< "mobius_scale=" << mobius_scale << std::endl;
//Setup the Grids
auto UGridD = TheHMC.Resources.GetCartesian();
auto UrbGridD = TheHMC.Resources.GetRBCartesian();
auto FGridD = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD);
auto FrbGridD = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD);
GridCartesian* UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian* UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF);
auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF);
ConjugateIwasakiGaugeActionD GaugeAction(beta);
// temporarily need a gauge field
LatticeGaugeFieldD Ud(UGridD);
LatticeGaugeFieldF Uf(UGridF);
//Setup the BCs
FermionActionD::ImplParams Params;
for(int i=0;i<Nd-1;i++) Params.twists[i] = user_params.GparityDirs[i]; //G-parity directions
Params.twists[Nd-1] = 1; //APBC in time direction
std::vector<int> dirs4(Nd);
for(int i=0;i<Nd-1;i++) dirs4[i] = user_params.GparityDirs[i];
dirs4[Nd-1] = 0; //periodic gauge BC in time
GaugeImplPolicy::setDirections(dirs4); //gauge BC
//Run optional gauge field checksum checker and exit
if(file_load_check){
TheHMC.initializeGaugeFieldAndRNGs(Ud);
std::cout << GridLogMessage << " Done" << std::endl;
Grid_finalize();
return 0;
}
////////////////////////////////////
// Collect actions
////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1); //light quark + strange quark
ActionLevel<HMCWrapper::Field> Level2(4); //DSDR
ActionLevel<HMCWrapper::Field> Level3(2); //gauge
/////////////////////////////////////////////////////////////
// Light EOFA action
// have to be careful with the parameters, cf. Test_dwf_gpforce_eofa.cc
/////////////////////////////////////////////////////////////
typedef SchurDiagMooeeOperator<EOFAactionD,FermionFieldD> EOFAschuropD;
typedef SchurDiagMooeeOperator<EOFAactionF,FermionFieldF> EOFAschuropF;
typedef ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction<FermionImplPolicyD, FermionImplPolicyF> EOFAmixPrecPFaction;
typedef MixedPrecisionConjugateGradientOperatorFunction<EOFAactionD, EOFAactionF, EOFAschuropD, EOFAschuropF> EOFA_mxCG;
typedef MixedPrecisionReliableUpdateConjugateGradientOperatorFunction<EOFAactionD, EOFAactionF, EOFAschuropD, EOFAschuropF> EOFA_relupCG;
std::vector<RealD> eofa_light_masses = { light_mass , 0.004, 0.016, 0.064, 0.256 };
std::vector<RealD> eofa_pv_masses = { 0.004 , 0.016, 0.064, 0.256, 1.0 };
int n_light_hsb = 5;
assert(user_params.eofa_l.size() == n_light_hsb);
EOFAmixPrecPFaction* EOFA_pfactions[n_light_hsb];
for(int i=0;i<n_light_hsb;i++){
RealD iml = eofa_light_masses[i];
RealD ipv = eofa_pv_masses[i];
EOFAactionD* LopD = new EOFAactionD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, iml, iml, ipv, 0.0, -1, M5, mob_b, mob_c, Params);
EOFAactionF* LopF = new EOFAactionF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, iml, iml, ipv, 0.0, -1, M5, mob_b, mob_c, Params);
EOFAactionD* RopD = new EOFAactionD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, ipv, iml, ipv, -1.0, 1, M5, mob_b, mob_c, Params);
EOFAactionF* RopF = new EOFAactionF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, ipv, iml, ipv, -1.0, 1, M5, mob_b, mob_c, Params);
EOFAschuropD* linopL_D = new EOFAschuropD(*LopD);
EOFAschuropD* linopR_D = new EOFAschuropD(*RopD);
EOFAschuropF* linopL_F = new EOFAschuropF(*LopF);
EOFAschuropF* linopR_F = new EOFAschuropF(*RopF);
#if 1
//Note reusing user_params.eofa_l.action(|md)_mixcg_inner_tolerance as Delta for now
EOFA_relupCG* ActionMCG_L = new EOFA_relupCG(user_params.eofa_l[i].action_tolerance, user_params.eofa_l[i].action_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
EOFA_relupCG* ActionMCG_R = new EOFA_relupCG(user_params.eofa_l[i].action_tolerance, user_params.eofa_l[i].action_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
EOFA_relupCG* DerivMCG_L = new EOFA_relupCG(user_params.eofa_l[i].md_tolerance, user_params.eofa_l[i].md_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
EOFA_relupCG* DerivMCG_R = new EOFA_relupCG(user_params.eofa_l[i].md_tolerance, user_params.eofa_l[i].md_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
#else
EOFA_mxCG* ActionMCG_L = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 50000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
ActionMCG_L->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance;
EOFA_mxCG* ActionMCG_R = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 50000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
ActionMCG_R->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance;
EOFA_mxCG* DerivMCG_L = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 50000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
DerivMCG_L->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance;
EOFA_mxCG* DerivMCG_R = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 50000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
DerivMCG_R->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance;
std::cout << GridLogMessage << "Set EOFA action solver action tolerance outer=" << ActionMCG_L->Tolerance << " inner=" << ActionMCG_L->InnerTolerance << std::endl;
std::cout << GridLogMessage << "Set EOFA MD solver tolerance outer=" << DerivMCG_L->Tolerance << " inner=" << DerivMCG_L->InnerTolerance << std::endl;
#endif
EOFAmixPrecPFaction* EOFA = new EOFAmixPrecPFaction(*LopF, *RopF,
*LopD, *RopD,
*ActionMCG_L, *ActionMCG_R,
*ActionMCG_L, *ActionMCG_R,
*DerivMCG_L, *DerivMCG_R,
user_params.eofa_l[i].rat_params, true);
EOFA_pfactions[i] = EOFA;
Level1.push_back(EOFA);
}
////////////////////////////////////
// Strange action
////////////////////////////////////
FermionActionD Numerator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD,strange_mass,M5,mob_b,mob_c,Params);
FermionActionD Denominator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD, pv_mass,M5,mob_b,mob_c,Params);
FermionActionF Numerator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF,strange_mass,M5,mob_b,mob_c,Params);
FermionActionF Denominator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF, pv_mass,M5,mob_b,mob_c,Params);
RationalActionParams rat_act_params_s;
rat_act_params_s.inv_pow = 4; // (M^dag M)^{1/4}
rat_act_params_s.precision= 60;
rat_act_params_s.MaxIter = 50000;
user_params.rat_quo_s.Export(rat_act_params_s);
std::cout << GridLogMessage << " Heavy quark bounds check every " << rat_act_params_s.BoundsCheckFreq << " trajectories (avg)" << std::endl;
//MixedPrecRHMC Quotient_s(Denominator_sD, Numerator_sD, Denominator_sF, Numerator_sF, rat_act_params_s, user_params.rat_quo_s.reliable_update_freq);
DoublePrecRHMC Quotient_s(Denominator_sD, Numerator_sD, rat_act_params_s);
Level1.push_back(&Quotient_s);
///////////////////////////////////
// DSDR action
///////////////////////////////////
RealD dsdr_mass=-1.8;
//Use same DSDR twists as https://arxiv.org/pdf/1208.4412.pdf
RealD dsdr_epsilon_f = 0.02; //numerator (in determinant)
RealD dsdr_epsilon_b = 0.5;
GparityWilsonTMFermionD Numerator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_f, Params);
GparityWilsonTMFermionF Numerator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_f, Params);
GparityWilsonTMFermionD Denominator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_b, Params);
GparityWilsonTMFermionF Denominator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_b, Params);
RationalActionParams rat_act_params_DSDR;
rat_act_params_DSDR.inv_pow = 2; // (M^dag M)^{1/2}
rat_act_params_DSDR.precision= 60;
rat_act_params_DSDR.MaxIter = 50000;
user_params.rat_quo_DSDR.Export(rat_act_params_DSDR);
std::cout << GridLogMessage << "DSDR quark bounds check every " << rat_act_params_DSDR.BoundsCheckFreq << " trajectories (avg)" << std::endl;
DoublePrecRHMC Quotient_DSDR(Denominator_DSDR_D, Numerator_DSDR_D, rat_act_params_DSDR);
Level2.push_back(&Quotient_DSDR);
/////////////////////////////////////////////////////////////
// Gauge action
/////////////////////////////////////////////////////////////
Level3.push_back(&GaugeAction);
TheHMC.TheAction.push_back(Level1);
TheHMC.TheAction.push_back(Level2);
TheHMC.TheAction.push_back(Level3);
std::cout << GridLogMessage << " Action complete "<< std::endl;
//Action tuning
bool
tune_rhmc_s=false, eigenrange_s=false,
tune_rhmc_DSDR=false, eigenrange_DSDR=false,
check_eofa=false,
upper_bound_eofa=false, lower_bound_eofa(false);
std::string lanc_params_s;
std::string lanc_params_DSDR;
int tune_rhmc_s_action_or_md;
int tune_rhmc_DSDR_action_or_md;
int eofa_which_hsb;
for(int i=1;i<argc;i++){
std::string sarg(argv[i]);
if(sarg == "--tune_rhmc_s"){
assert(i < argc-1);
tune_rhmc_s=true;
tune_rhmc_s_action_or_md = std::stoi(argv[i+1]);
}
else if(sarg == "--eigenrange_s"){
assert(i < argc-1);
eigenrange_s=true;
lanc_params_s = argv[i+1];
}
else if(sarg == "--tune_rhmc_DSDR"){
assert(i < argc-1);
tune_rhmc_DSDR=true;
tune_rhmc_DSDR_action_or_md = std::stoi(argv[i+1]);
}
else if(sarg == "--eigenrange_DSDR"){
assert(i < argc-1);
eigenrange_DSDR=true;
lanc_params_DSDR = argv[i+1];
}
else if(sarg == "--check_eofa"){
assert(i < argc-1);
check_eofa = true;
eofa_which_hsb = std::stoi(argv[i+1]); //-1 indicates all hasenbusch
assert(eofa_which_hsb == -1 || (eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb) );
}
else if(sarg == "--upper_bound_eofa"){
assert(i < argc-1);
upper_bound_eofa = true;
eofa_which_hsb = std::stoi(argv[i+1]);
assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb);
}
else if(sarg == "--lower_bound_eofa"){
assert(i < argc-1);
lower_bound_eofa = true;
eofa_which_hsb = std::stoi(argv[i+1]);
assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb);
}
}
if(tune_rhmc_s || eigenrange_s || tune_rhmc_DSDR || eigenrange_DSDR ||check_eofa || upper_bound_eofa || lower_bound_eofa) {
std::cout << GridLogMessage << "Running checks" << std::endl;
TheHMC.initializeGaugeFieldAndRNGs(Ud);
//std::cout << GridLogMessage << "EOFA action solver action tolerance outer=" << ActionMCG_L.Tolerance << " inner=" << ActionMCG_L.InnerTolerance << std::endl;
//std::cout << GridLogMessage << "EOFA MD solver tolerance outer=" << DerivMCG_L.Tolerance << " inner=" << DerivMCG_L.InnerTolerance << std::endl;
if(check_eofa){
if(eofa_which_hsb >= 0){
std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << eofa_which_hsb << std::endl;
checkEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << eofa_which_hsb << std::endl;
}else{
for(int i=0;i<n_light_hsb;i++){
std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << i << std::endl;
checkEOFA(*EOFA_pfactions[i], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << i << std::endl;
}
}
}
if(upper_bound_eofa) upperBoundEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
if(lower_bound_eofa) lowerBoundEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
if(eigenrange_s) computeEigenvalues<FermionActionD, FermionFieldD>(lanc_params_s, FGridD, FrbGridD, Ud, Numerator_sD, TheHMC.Resources.GetParallelRNG());
if(tune_rhmc_s) checkRHMC<FermionActionD, FermionFieldD, decltype(Quotient_s)>(FGridD, FrbGridD, Ud, Numerator_sD, Denominator_sD, Quotient_s, TheHMC.Resources.GetParallelRNG(), 4, "strange", tune_rhmc_s_action_or_md);
if(eigenrange_DSDR) computeEigenvalues<GparityWilsonTMFermionD, GparityWilsonTMFermionD::FermionField>(lanc_params_DSDR, UGridD, UrbGridD, Ud, Numerator_DSDR_D, TheHMC.Resources.GetParallelRNG());
if(tune_rhmc_DSDR) checkRHMC<GparityWilsonTMFermionD, GparityWilsonTMFermionD::FermionField, decltype(Quotient_DSDR)>(UGridD, UrbGridD, Ud, Numerator_DSDR_D, Denominator_DSDR_D, Quotient_DSDR, TheHMC.Resources.GetParallelRNG(), 2, "DSDR", tune_rhmc_DSDR_action_or_md);
std::cout << GridLogMessage << " Done" << std::endl;
Grid_finalize();
return 0;
}
//Run the HMC
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
TheHMC.Run();
std::cout << GridLogMessage << " Done" << std::endl;
Grid_finalize();
return 0;
} // main

View File

@ -1,873 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./HMC/Mobius2p1fIDSDRGparityEOFA.cc
Copyright (C) 2015-2016
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace Grid;
//Production binary for the 40ID G-parity ensemble
struct RatQuoParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(RatQuoParameters,
double, bnd_lo,
double, bnd_hi,
Integer, action_degree,
double, action_tolerance,
Integer, md_degree,
double, md_tolerance,
Integer, reliable_update_freq,
Integer, bnd_check_freq);
RatQuoParameters() {
bnd_lo = 1e-2;
bnd_hi = 30;
action_degree = 10;
action_tolerance = 1e-10;
md_degree = 10;
md_tolerance = 1e-8;
bnd_check_freq = 20;
reliable_update_freq = 50;
}
void Export(RationalActionParams &into) const{
into.lo = bnd_lo;
into.hi = bnd_hi;
into.action_degree = action_degree;
into.action_tolerance = action_tolerance;
into.md_degree = md_degree;
into.md_tolerance = md_tolerance;
into.BoundsCheckFreq = bnd_check_freq;
}
};
struct EOFAparameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(EOFAparameters,
OneFlavourRationalParams, rat_params,
double, action_tolerance,
double, action_mixcg_inner_tolerance,
double, md_tolerance,
double, md_mixcg_inner_tolerance);
EOFAparameters() {
action_mixcg_inner_tolerance = 1e-8;
action_tolerance = 1e-10;
md_tolerance = 1e-8;
md_mixcg_inner_tolerance = 1e-8;
rat_params.lo = 1.0;
rat_params.hi = 25.0;
rat_params.MaxIter = 10000;
rat_params.tolerance= 1.0e-9;
rat_params.degree = 14;
rat_params.precision= 50;
}
};
struct EvolParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(EvolParameters,
Integer, StartTrajectory,
Integer, Trajectories,
Integer, SaveInterval,
Integer, Steps,
RealD, TrajectoryLength,
bool, MetropolisTest,
std::string, StartingType,
std::vector<Integer>, GparityDirs,
std::vector<EOFAparameters>, eofa_l,
RatQuoParameters, rat_quo_s,
RatQuoParameters, rat_quo_DSDR);
EvolParameters() {
//For initial thermalization; afterwards user should switch Metropolis on and use StartingType=CheckpointStart
MetropolisTest = false;
StartTrajectory = 0;
Trajectories = 50;
SaveInterval = 5;
StartingType = "ColdStart";
GparityDirs.resize(3, 1); //1 for G-parity, 0 for periodic
Steps = 5;
TrajectoryLength = 1.0;
}
};
bool fileExists(const std::string &fn){
std::ifstream f(fn);
return f.good();
}
struct LanczosParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
double, alpha,
double, beta,
double, mu,
int, ord,
int, n_stop,
int, n_want,
int, n_use,
double, tolerance);
LanczosParameters() {
alpha = 35;
beta = 5;
mu = 0;
ord = 100;
n_stop = 10;
n_want = 10;
n_use = 15;
tolerance = 1e-6;
}
};
template<typename FermionActionD, typename FermionFieldD>
void computeEigenvalues(std::string param_file,
GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt, //expect lattice to have been initialized to something
FermionActionD &action, GridParallelRNG &rng){
LanczosParameters params;
if(fileExists(param_file)){
std::cout << GridLogMessage << " Reading " << param_file << std::endl;
Grid::XmlReader rd(param_file);
read(rd, "LanczosParameters", params);
}else if(!GlobalSharedMemory::WorldRank){
std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
Grid::XmlWriter wr(param_file + ".templ");
write(wr, "LanczosParameters", params);
}
FermionFieldD gauss_o(rbGrid);
FermionFieldD gauss(Grid);
gaussian(rng, gauss);
pickCheckerboard(Odd, gauss_o, gauss);
action.ImportGauge(latt);
SchurDiagMooeeOperator<FermionActionD, FermionFieldD> hermop(action);
PlainHermOp<FermionFieldD> hermop_wrap(hermop);
//ChebyshevLanczos<FermionFieldD> Cheb(params.alpha, params.beta, params.mu, params.ord);
assert(params.mu == 0.0);
Chebyshev<FermionFieldD> Cheb(params.beta*params.beta, params.alpha*params.alpha, params.ord+1);
FunctionHermOp<FermionFieldD> Cheb_wrap(Cheb, hermop);
std::cout << "IRL: alpha=" << params.alpha << " beta=" << params.beta << " mu=" << params.mu << " ord=" << params.ord << std::endl;
ImplicitlyRestartedLanczos<FermionFieldD> IRL(Cheb_wrap, hermop_wrap, params.n_stop, params.n_want, params.n_use, params.tolerance, 10000);
std::vector<RealD> eval(params.n_use);
std::vector<FermionFieldD> evec(params.n_use, rbGrid);
int Nconv;
IRL.calc(eval, evec, gauss_o, Nconv);
std::cout << "Eigenvalues:" << std::endl;
for(int i=0;i<params.n_want;i++){
std::cout << i << " " << eval[i] << std::endl;
}
}
//Check the quality of the RHMC approx
//action_or_md toggles checking the action (0), MD (1) or both (2) setups
template<typename FermionActionD, typename FermionFieldD, typename RHMCtype>
void checkRHMC(GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt, //expect lattice to have been initialized to something
FermionActionD &numOp, FermionActionD &denOp, RHMCtype &rhmc, GridParallelRNG &rng,
int inv_pow, const std::string &quark_descr, int action_or_md){
assert(action_or_md == 0 || action_or_md == 1 || action_or_md == 2);
FermionFieldD gauss_o(rbGrid);
FermionFieldD gauss(Grid);
gaussian(rng, gauss);
pickCheckerboard(Odd, gauss_o, gauss);
numOp.ImportGauge(latt);
denOp.ImportGauge(latt);
typedef typename FermionActionD::Impl_t FermionImplPolicyD;
SchurDifferentiableOperator<FermionImplPolicyD> MdagM(numOp);
SchurDifferentiableOperator<FermionImplPolicyD> VdagV(denOp);
PowerMethod<FermionFieldD> power_method;
RealD lambda_max;
std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " numerator" << std::endl;
lambda_max = power_method(MdagM,gauss_o);
std::cout << GridLogMessage << "Got lambda_max "<<lambda_max<<std::endl;
std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " denominator" << std::endl;
lambda_max = power_method(VdagV,gauss_o);
std::cout << GridLogMessage << "Got lambda_max "<<lambda_max<<std::endl;
if(action_or_md == 0 || action_or_md == 2){
std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
InversePowerBoundsCheck(inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerAction); //use large tolerance to prevent exit on fail; we are trying to tune here!
std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerAction);
std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
InversePowerBoundsCheck(inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerAction);
std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerAction);
std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
}
std::cout << "-------------------------------------------------------------------------------" << std::endl;
if(action_or_md == 1 || action_or_md == 2){
std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
InversePowerBoundsCheck(inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerMD);
std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerMD);
std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
InversePowerBoundsCheck(inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerMD);
std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerMD);
std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
}
}
template<typename FermionImplPolicy>
void checkEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
GridCartesian* FGrid, GridParallelRNG &rng, const LatticeGaugeFieldD &latt){
std::cout << GridLogMessage << "Starting EOFA action/bounds check" << std::endl;
typename FermionImplPolicy::FermionField eta(FGrid);
RealD scale = std::sqrt(0.5);
gaussian(rng,eta); eta = eta * scale;
//Use the inbuilt check
EOFA.refresh(latt, eta);
EOFA.S(latt);
std::cout << GridLogMessage << "Finished EOFA upper action/bounds check" << std::endl;
}
template<typename FermionImplPolicy>
class EOFAlinop: public LinearOperatorBase<typename FermionImplPolicy::FermionField>{
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA;
LatticeGaugeFieldD &U;
public:
EOFAlinop(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){}
typedef typename FermionImplPolicy::FermionField Field;
void OpDiag (const Field &in, Field &out){ assert(0); }
void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); }
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); }
void Op (const Field &in, Field &out){ assert(0); }
void AdjOp (const Field &in, Field &out){ assert(0); }
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
void HermOp(const Field &in, Field &out){ EOFA.Meofa(U, in, out); }
};
template<typename FermionImplPolicy>
void upperBoundEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){
std::cout << GridLogMessage << "Starting EOFA upper bound compute" << std::endl;
EOFAlinop<FermionImplPolicy> linop(EOFA, latt);
typename FermionImplPolicy::FermionField eta(FGrid);
gaussian(rng,eta);
PowerMethod<typename FermionImplPolicy::FermionField> power_method;
auto lambda_max = power_method(linop,eta);
std::cout << GridLogMessage << "Upper bound of EOFA operator " << lambda_max << std::endl;
}
//Applications of M^{-1} cost the same as M for EOFA!
template<typename FermionImplPolicy>
class EOFAinvLinop: public LinearOperatorBase<typename FermionImplPolicy::FermionField>{
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA;
LatticeGaugeFieldD &U;
public:
EOFAinvLinop(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){}
typedef typename FermionImplPolicy::FermionField Field;
void OpDiag (const Field &in, Field &out){ assert(0); }
void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); }
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); }
void Op (const Field &in, Field &out){ assert(0); }
void AdjOp (const Field &in, Field &out){ assert(0); }
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
void HermOp(const Field &in, Field &out){ EOFA.MeofaInv(U, in, out); }
};
template<typename FermionImplPolicy>
void lowerBoundEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){
std::cout << GridLogMessage << "Starting EOFA lower bound compute using power method on M^{-1}. Inverse of highest eigenvalue is the lowest eigenvalue of M" << std::endl;
EOFAinvLinop<FermionImplPolicy> linop(EOFA, latt);
typename FermionImplPolicy::FermionField eta(FGrid);
gaussian(rng,eta);
PowerMethod<typename FermionImplPolicy::FermionField> power_method;
auto lambda_max = power_method(linop,eta);
std::cout << GridLogMessage << "Lower bound of EOFA operator " << 1./lambda_max << std::endl;
}
NAMESPACE_BEGIN(Grid);
template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class SchurOperatorF>
class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
public:
typedef typename FermionOperatorD::FermionField FieldD;
typedef typename FermionOperatorF::FermionField FieldF;
using OperatorFunction<FieldD>::operator();
RealD Tolerance;
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations;
Integer MaxOuterIterations;
GridBase* SinglePrecGrid4; //Grid for single-precision fields
GridBase* SinglePrecGrid5; //Grid for single-precision fields
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
FermionOperatorF &FermOpF;
FermionOperatorD &FermOpD;;
SchurOperatorF &LinOpF;
SchurOperatorD &LinOpD;
Integer TotalInnerIterations; //Number of inner CG iterations
Integer TotalOuterIterations; //Number of restarts
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
MixedPrecisionConjugateGradientOperatorFunction(RealD tol,
Integer maxinnerit,
Integer maxouterit,
GridBase* _sp_grid4,
GridBase* _sp_grid5,
FermionOperatorF &_FermOpF,
FermionOperatorD &_FermOpD,
SchurOperatorF &_LinOpF,
SchurOperatorD &_LinOpD):
LinOpF(_LinOpF),
LinOpD(_LinOpD),
FermOpF(_FermOpF),
FermOpD(_FermOpD),
Tolerance(tol),
InnerTolerance(tol),
MaxInnerIterations(maxinnerit),
MaxOuterIterations(maxouterit),
SinglePrecGrid4(_sp_grid4),
SinglePrecGrid5(_sp_grid5),
OuterLoopNormMult(100.)
{
};
void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
precisionChange(FermOpF.Umu, FermOpD.Umu);
pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
////////////////////////////////////////////////////////////////////////////////////
// Make a mixed precision conjugate gradient
////////////////////////////////////////////////////////////////////////////////////
MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
MPCG.InnerTolerance = InnerTolerance;
std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
MPCG(src,psi);
}
};
template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class SchurOperatorF>
class MixedPrecisionReliableUpdateConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
public:
typedef typename FermionOperatorD::FermionField FieldD;
typedef typename FermionOperatorF::FermionField FieldF;
using OperatorFunction<FieldD>::operator();
RealD Tolerance;
Integer MaxIterations;
RealD Delta; //reliable update parameter
GridBase* SinglePrecGrid4; //Grid for single-precision fields
GridBase* SinglePrecGrid5; //Grid for single-precision fields
FermionOperatorF &FermOpF;
FermionOperatorD &FermOpD;;
SchurOperatorF &LinOpF;
SchurOperatorD &LinOpD;
MixedPrecisionReliableUpdateConjugateGradientOperatorFunction(RealD tol,
RealD delta,
Integer maxit,
GridBase* _sp_grid4,
GridBase* _sp_grid5,
FermionOperatorF &_FermOpF,
FermionOperatorD &_FermOpD,
SchurOperatorF &_LinOpF,
SchurOperatorD &_LinOpD):
LinOpF(_LinOpF),
LinOpD(_LinOpD),
FermOpF(_FermOpF),
FermOpD(_FermOpD),
Tolerance(tol),
Delta(delta),
MaxIterations(maxit),
SinglePrecGrid4(_sp_grid4),
SinglePrecGrid5(_sp_grid5)
{
};
void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
std::cout << GridLogMessage << " Mixed precision reliable CG update wrapper operator() "<<std::endl;
SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
precisionChange(FermOpF.Umu, FermOpD.Umu);
pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
////////////////////////////////////////////////////////////////////////////////////
// Make a mixed precision conjugate gradient
////////////////////////////////////////////////////////////////////////////////////
ConjugateGradientReliableUpdate<FieldD,FieldF> MPCG(Tolerance,MaxIterations,Delta,SinglePrecGrid5,LinOpF,LinOpD);
std::cout << GridLogMessage << "Calling mixed precision reliable update Conjugate Gradient" <<std::endl;
MPCG(src,psi);
}
};
NAMESPACE_END(Grid);
int main(int argc, char **argv) {
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
// here make a routine to print all the relevant information on the run
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
std::string param_file = "params.xml";
bool file_load_check = false;
for(int i=1;i<argc;i++){
std::string sarg(argv[i]);
if(sarg == "--param_file"){
assert(i!=argc-1);
param_file = argv[i+1];
}else if(sarg == "--read_check"){ //check the fields load correctly and pass checksum/plaquette repro
file_load_check = true;
}
}
//Read the user parameters
EvolParameters user_params;
if(fileExists(param_file)){
std::cout << GridLogMessage << " Reading " << param_file << std::endl;
Grid::XmlReader rd(param_file);
read(rd, "Params", user_params);
}else if(!GlobalSharedMemory::WorldRank){
std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
{
Grid::XmlWriter wr(param_file + ".templ");
write(wr, "Params", user_params);
}
std::cout << GridLogMessage << " Done" << std::endl;
Grid_finalize();
return 0;
}
//Check the parameters
if(user_params.GparityDirs.size() != Nd-1){
std::cerr << "Error in input parameters: expect GparityDirs to have size = " << Nd-1 << std::endl;
exit(1);
}
for(int i=0;i<Nd-1;i++)
if(user_params.GparityDirs[i] != 0 && user_params.GparityDirs[i] != 1){
std::cerr << "Error in input parameters: expect GparityDirs values to be 0 (periodic) or 1 (G-parity)" << std::endl;
exit(1);
}
typedef GparityMobiusEOFAFermionD EOFAactionD;
typedef GparityMobiusFermionD FermionActionD;
typedef typename FermionActionD::Impl_t FermionImplPolicyD;
typedef typename FermionActionD::FermionField FermionFieldD;
typedef GparityMobiusEOFAFermionF EOFAactionF;
typedef GparityMobiusFermionF FermionActionF;
typedef typename FermionActionF::Impl_t FermionImplPolicyF;
typedef typename FermionActionF::FermionField FermionFieldF;
typedef GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicyD,FermionImplPolicyF> MixedPrecRHMC;
typedef GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicyD> DoublePrecRHMC;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
typedef ConjugateHMCRunnerD<MinimumNorm2> HMCWrapper; //NB: This is the "Omelyan integrator"
typedef HMCWrapper::ImplPolicy GaugeImplPolicy;
MD.name = std::string("MinimumNorm2");
MD.MDsteps = user_params.Steps;
MD.trajL = user_params.TrajectoryLength;
HMCparameters HMCparams;
HMCparams.StartTrajectory = user_params.StartTrajectory;
HMCparams.Trajectories = user_params.Trajectories;
HMCparams.NoMetropolisUntil= 0;
HMCparams.StartingType = user_params.StartingType;
HMCparams.MetropolisTest = user_params.MetropolisTest;
HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams);
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_lat";
CPparams.rng_prefix = "ckpoint_rng";
CPparams.saveInterval = user_params.SaveInterval;
CPparams.format = "IEEE64BIG";
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
//Note that checkpointing saves the RNG state so that this initialization is required only for the very first configuration
RNGModuleParameters RNGpar;
RNGpar.serial_seeds = "1 2 3 4 5";
RNGpar.parallel_seeds = "6 7 8 9 10";
TheHMC.Resources.SetRNGSeeds(RNGpar);
typedef PlaquetteMod<GaugeImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
//aiming for ainv=2.068 me Bob
//Estimated a(ml+mres) [48ID] = 0.001048 0.00104
// a(mh+mres) [48ID] = 0.028847 0.02805
//Estimate Ls=12, b+c=2 mres~0.0003
const int Ls = 12;
Real beta = 1.946;
Real light_mass = 0.00074; //0.00104 - mres_approx;
Real strange_mass = 0.02775; //0.02805 - mres_approx
Real pv_mass = 1.0;
RealD M5 = 1.8;
RealD mobius_scale = 2.; //b+c
RealD mob_bmc = 1.0;
RealD mob_b = (mobius_scale + mob_bmc)/2.;
RealD mob_c = (mobius_scale - mob_bmc)/2.;
//Setup the Grids
auto UGridD = TheHMC.Resources.GetCartesian();
auto UrbGridD = TheHMC.Resources.GetRBCartesian();
auto FGridD = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD);
auto FrbGridD = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD);
GridCartesian* UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian* UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF);
auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF);
ConjugateIwasakiGaugeActionD GaugeAction(beta);
// temporarily need a gauge field
LatticeGaugeFieldD Ud(UGridD);
LatticeGaugeFieldF Uf(UGridF);
//Setup the BCs
FermionActionD::ImplParams Params;
for(int i=0;i<Nd-1;i++) Params.twists[i] = user_params.GparityDirs[i]; //G-parity directions
Params.twists[Nd-1] = 1; //APBC in time direction
std::vector<int> dirs4(Nd);
for(int i=0;i<Nd-1;i++) dirs4[i] = user_params.GparityDirs[i];
dirs4[Nd-1] = 0; //periodic gauge BC in time
GaugeImplPolicy::setDirections(dirs4); //gauge BC
//Run optional gauge field checksum checker and exit
if(file_load_check){
TheHMC.initializeGaugeFieldAndRNGs(Ud);
std::cout << GridLogMessage << " Done" << std::endl;
Grid_finalize();
return 0;
}
////////////////////////////////////
// Collect actions
////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1); //light quark + strange quark
ActionLevel<HMCWrapper::Field> Level2(4); //DSDR
ActionLevel<HMCWrapper::Field> Level3(2); //gauge
/////////////////////////////////////////////////////////////
// Light EOFA action
// have to be careful with the parameters, cf. Test_dwf_gpforce_eofa.cc
/////////////////////////////////////////////////////////////
typedef SchurDiagMooeeOperator<EOFAactionD,FermionFieldD> EOFAschuropD;
typedef SchurDiagMooeeOperator<EOFAactionF,FermionFieldF> EOFAschuropF;
typedef ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction<FermionImplPolicyD, FermionImplPolicyF> EOFAmixPrecPFaction;
typedef MixedPrecisionConjugateGradientOperatorFunction<EOFAactionD, EOFAactionF, EOFAschuropD, EOFAschuropF> EOFA_mxCG;
typedef MixedPrecisionReliableUpdateConjugateGradientOperatorFunction<EOFAactionD, EOFAactionF, EOFAschuropD, EOFAschuropF> EOFA_relupCG;
std::vector<RealD> eofa_light_masses = { light_mass , 0.004, 0.016, 0.064, 0.256 };
std::vector<RealD> eofa_pv_masses = { 0.004 , 0.016, 0.064, 0.256, 1.0 };
int n_light_hsb = 5;
assert(user_params.eofa_l.size() == n_light_hsb);
EOFAmixPrecPFaction* EOFA_pfactions[n_light_hsb];
for(int i=0;i<n_light_hsb;i++){
RealD iml = eofa_light_masses[i];
RealD ipv = eofa_pv_masses[i];
EOFAactionD* LopD = new EOFAactionD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, iml, iml, ipv, 0.0, -1, M5, mob_b, mob_c, Params);
EOFAactionF* LopF = new EOFAactionF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, iml, iml, ipv, 0.0, -1, M5, mob_b, mob_c, Params);
EOFAactionD* RopD = new EOFAactionD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, ipv, iml, ipv, -1.0, 1, M5, mob_b, mob_c, Params);
EOFAactionF* RopF = new EOFAactionF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, ipv, iml, ipv, -1.0, 1, M5, mob_b, mob_c, Params);
EOFAschuropD* linopL_D = new EOFAschuropD(*LopD);
EOFAschuropD* linopR_D = new EOFAschuropD(*RopD);
EOFAschuropF* linopL_F = new EOFAschuropF(*LopF);
EOFAschuropF* linopR_F = new EOFAschuropF(*RopF);
#if 1
//Note reusing user_params.eofa_l.action(|md)_mixcg_inner_tolerance as Delta for now
EOFA_relupCG* ActionMCG_L = new EOFA_relupCG(user_params.eofa_l[i].action_tolerance, user_params.eofa_l[i].action_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
EOFA_relupCG* ActionMCG_R = new EOFA_relupCG(user_params.eofa_l[i].action_tolerance, user_params.eofa_l[i].action_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
EOFA_relupCG* DerivMCG_L = new EOFA_relupCG(user_params.eofa_l[i].md_tolerance, user_params.eofa_l[i].md_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
EOFA_relupCG* DerivMCG_R = new EOFA_relupCG(user_params.eofa_l[i].md_tolerance, user_params.eofa_l[i].md_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
#else
EOFA_mxCG* ActionMCG_L = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 10000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
ActionMCG_L->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance;
EOFA_mxCG* ActionMCG_R = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 10000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
ActionMCG_R->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance;
EOFA_mxCG* DerivMCG_L = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 10000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
DerivMCG_L->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance;
EOFA_mxCG* DerivMCG_R = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 10000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
DerivMCG_R->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance;
std::cout << GridLogMessage << "Set EOFA action solver action tolerance outer=" << ActionMCG_L->Tolerance << " inner=" << ActionMCG_L->InnerTolerance << std::endl;
std::cout << GridLogMessage << "Set EOFA MD solver tolerance outer=" << DerivMCG_L->Tolerance << " inner=" << DerivMCG_L->InnerTolerance << std::endl;
#endif
EOFAmixPrecPFaction* EOFA = new EOFAmixPrecPFaction(*LopF, *RopF,
*LopD, *RopD,
*ActionMCG_L, *ActionMCG_R,
*ActionMCG_L, *ActionMCG_R,
*DerivMCG_L, *DerivMCG_R,
user_params.eofa_l[i].rat_params, true);
EOFA_pfactions[i] = EOFA;
Level1.push_back(EOFA);
}
////////////////////////////////////
// Strange action
////////////////////////////////////
FermionActionD Numerator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD,strange_mass,M5,mob_b,mob_c,Params);
FermionActionD Denominator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD, pv_mass,M5,mob_b,mob_c,Params);
FermionActionF Numerator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF,strange_mass,M5,mob_b,mob_c,Params);
FermionActionF Denominator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF, pv_mass,M5,mob_b,mob_c,Params);
RationalActionParams rat_act_params_s;
rat_act_params_s.inv_pow = 4; // (M^dag M)^{1/4}
rat_act_params_s.precision= 60;
rat_act_params_s.MaxIter = 10000;
user_params.rat_quo_s.Export(rat_act_params_s);
std::cout << GridLogMessage << " Heavy quark bounds check every " << rat_act_params_s.BoundsCheckFreq << " trajectories (avg)" << std::endl;
//MixedPrecRHMC Quotient_s(Denominator_sD, Numerator_sD, Denominator_sF, Numerator_sF, rat_act_params_s, user_params.rat_quo_s.reliable_update_freq);
DoublePrecRHMC Quotient_s(Denominator_sD, Numerator_sD, rat_act_params_s);
Level1.push_back(&Quotient_s);
///////////////////////////////////
// DSDR action
///////////////////////////////////
RealD dsdr_mass=-1.8;
//Use same DSDR twists as https://arxiv.org/pdf/1208.4412.pdf
RealD dsdr_epsilon_f = 0.02; //numerator (in determinant)
RealD dsdr_epsilon_b = 0.5;
GparityWilsonTMFermionD Numerator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_f, Params);
GparityWilsonTMFermionF Numerator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_f, Params);
GparityWilsonTMFermionD Denominator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_b, Params);
GparityWilsonTMFermionF Denominator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_b, Params);
RationalActionParams rat_act_params_DSDR;
rat_act_params_DSDR.inv_pow = 2; // (M^dag M)^{1/2}
rat_act_params_DSDR.precision= 60;
rat_act_params_DSDR.MaxIter = 10000;
user_params.rat_quo_DSDR.Export(rat_act_params_DSDR);
std::cout << GridLogMessage << "DSDR quark bounds check every " << rat_act_params_DSDR.BoundsCheckFreq << " trajectories (avg)" << std::endl;
DoublePrecRHMC Quotient_DSDR(Denominator_DSDR_D, Numerator_DSDR_D, rat_act_params_DSDR);
Level2.push_back(&Quotient_DSDR);
/////////////////////////////////////////////////////////////
// Gauge action
/////////////////////////////////////////////////////////////
Level3.push_back(&GaugeAction);
TheHMC.TheAction.push_back(Level1);
TheHMC.TheAction.push_back(Level2);
TheHMC.TheAction.push_back(Level3);
std::cout << GridLogMessage << " Action complete "<< std::endl;
//Action tuning
bool
tune_rhmc_s=false, eigenrange_s=false,
tune_rhmc_DSDR=false, eigenrange_DSDR=false,
check_eofa=false,
upper_bound_eofa=false, lower_bound_eofa(false);
std::string lanc_params_s;
std::string lanc_params_DSDR;
int tune_rhmc_s_action_or_md;
int tune_rhmc_DSDR_action_or_md;
int eofa_which_hsb;
for(int i=1;i<argc;i++){
std::string sarg(argv[i]);
if(sarg == "--tune_rhmc_s"){
assert(i < argc-1);
tune_rhmc_s=true;
tune_rhmc_s_action_or_md = std::stoi(argv[i+1]);
}
else if(sarg == "--eigenrange_s"){
assert(i < argc-1);
eigenrange_s=true;
lanc_params_s = argv[i+1];
}
else if(sarg == "--tune_rhmc_DSDR"){
assert(i < argc-1);
tune_rhmc_DSDR=true;
tune_rhmc_DSDR_action_or_md = std::stoi(argv[i+1]);
}
else if(sarg == "--eigenrange_DSDR"){
assert(i < argc-1);
eigenrange_DSDR=true;
lanc_params_DSDR = argv[i+1];
}
else if(sarg == "--check_eofa"){
assert(i < argc-1);
check_eofa = true;
eofa_which_hsb = std::stoi(argv[i+1]); //-1 indicates all hasenbusch
assert(eofa_which_hsb == -1 || (eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb) );
}
else if(sarg == "--upper_bound_eofa"){
assert(i < argc-1);
upper_bound_eofa = true;
eofa_which_hsb = std::stoi(argv[i+1]);
assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb);
}
else if(sarg == "--lower_bound_eofa"){
assert(i < argc-1);
lower_bound_eofa = true;
eofa_which_hsb = std::stoi(argv[i+1]);
assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb);
}
}
if(tune_rhmc_s || eigenrange_s || tune_rhmc_DSDR || eigenrange_DSDR ||check_eofa || upper_bound_eofa || lower_bound_eofa) {
std::cout << GridLogMessage << "Running checks" << std::endl;
TheHMC.initializeGaugeFieldAndRNGs(Ud);
//std::cout << GridLogMessage << "EOFA action solver action tolerance outer=" << ActionMCG_L.Tolerance << " inner=" << ActionMCG_L.InnerTolerance << std::endl;
//std::cout << GridLogMessage << "EOFA MD solver tolerance outer=" << DerivMCG_L.Tolerance << " inner=" << DerivMCG_L.InnerTolerance << std::endl;
if(check_eofa){
if(eofa_which_hsb >= 0){
std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << eofa_which_hsb << std::endl;
checkEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << eofa_which_hsb << std::endl;
}else{
for(int i=0;i<n_light_hsb;i++){
std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << i << std::endl;
checkEOFA(*EOFA_pfactions[i], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << i << std::endl;
}
}
}
if(upper_bound_eofa) upperBoundEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
if(lower_bound_eofa) lowerBoundEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
if(eigenrange_s) computeEigenvalues<FermionActionD, FermionFieldD>(lanc_params_s, FGridD, FrbGridD, Ud, Numerator_sD, TheHMC.Resources.GetParallelRNG());
if(tune_rhmc_s) checkRHMC<FermionActionD, FermionFieldD, decltype(Quotient_s)>(FGridD, FrbGridD, Ud, Numerator_sD, Denominator_sD, Quotient_s, TheHMC.Resources.GetParallelRNG(), 4, "strange", tune_rhmc_s_action_or_md);
if(eigenrange_DSDR) computeEigenvalues<GparityWilsonTMFermionD, GparityWilsonTMFermionD::FermionField>(lanc_params_DSDR, UGridD, UrbGridD, Ud, Numerator_DSDR_D, TheHMC.Resources.GetParallelRNG());
if(tune_rhmc_DSDR) checkRHMC<GparityWilsonTMFermionD, GparityWilsonTMFermionD::FermionField, decltype(Quotient_DSDR)>(UGridD, UrbGridD, Ud, Numerator_DSDR_D, Denominator_DSDR_D, Quotient_DSDR, TheHMC.Resources.GetParallelRNG(), 2, "DSDR", tune_rhmc_DSDR_action_or_md);
std::cout << GridLogMessage << " Done" << std::endl;
Grid_finalize();
return 0;
}
//Run the HMC
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
TheHMC.Run();
std::cout << GridLogMessage << " Done" << std::endl;
Grid_finalize();
return 0;
} // main

View File

@ -1,267 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_hmc_EODWFRatio.cc
Copyright (C) 2015-2016
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
int main(int argc, char **argv) {
using namespace Grid;
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
// Typedefs to simplify notation
typedef WilsonImplR FermionImplPolicy;
typedef MobiusFermionR FermionAction;
typedef typename FermionAction::FermionField FermionField;
typedef Grid::XmlReader Serialiser;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// MD.name = std::string("Leap Frog");
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
// MD.name = std::string("Force Gradient");
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
MD.name = std::string("MinimumNorm2");
MD.MDsteps = 4;
MD.trajL = 1.0;
HMCparameters HMCparams;
HMCparams.StartTrajectory = 17;
HMCparams.Trajectories = 200;
HMCparams.NoMetropolisUntil= 0;
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
// HMCparams.StartingType =std::string("ColdStart");
HMCparams.StartingType =std::string("CheckpointStart");
HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams);
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_DDHMC_lat";
CPparams.rng_prefix = "ckpoint_DDHMC_rng";
CPparams.saveInterval = 1;
CPparams.format = "IEEE64BIG";
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
RNGModuleParameters RNGpar;
RNGpar.serial_seeds = "1 2 3 4 5";
RNGpar.parallel_seeds = "6 7 8 9 10";
TheHMC.Resources.SetRNGSeeds(RNGpar);
// Construct observables
// here there is too much indirection
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
const int Ls = 16;
RealD M5 = 1.8;
RealD b = 1.0;
RealD c = 0.0;
Real beta = 2.13;
Real light_mass = 0.01;
Real strange_mass = 0.04;
Real pv_mass = 1.0;
std::vector<Real> hasenbusch({ light_mass, 0.04, 0.25, 0.4, 0.7 , pv_mass });
// FIXME:
// Same in MC and MD
// Need to mix precision too
OneFlavourRationalParams SFRp;
SFRp.lo = 4.0e-3;
SFRp.hi = 30.0;
SFRp.MaxIter = 10000;
SFRp.tolerance= 1.0e-8;
SFRp.mdtolerance= 1.0e-5;
SFRp.degree = 16;
SFRp.precision= 50;
SFRp.BoundsCheckFreq=5;
OneFlavourRationalParams OFRp;
OFRp.lo = 1.0e-4;
OFRp.hi = 30.0;
OFRp.MaxIter = 10000;
OFRp.tolerance= 1.0e-8;
OFRp.mdtolerance= 1.0e-5;
OFRp.degree = 16;
OFRp.precision= 50;
OFRp.BoundsCheckFreq=5;
auto GridPtr = TheHMC.Resources.GetCartesian();
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
////////////////////////////////////////////////////////////////
// Domain decomposed
////////////////////////////////////////////////////////////////
Coordinate latt4 = GridPtr->GlobalDimensions();
Coordinate mpi = GridPtr->ProcessorGrid();
Coordinate shm;
GlobalSharedMemory::GetShmDims(mpi,shm);
Coordinate CommDim(Nd);
for(int d=0;d<Nd;d++) CommDim[d]= (mpi[d]/shm[d])>1 ? 1 : 0;
Coordinate Dirichlet(Nd+1,0);
Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0];
Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1];
Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2];
Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3];
Coordinate Block4(Nd);
Block4[0] = Dirichlet[1];
Block4[1] = Dirichlet[2];
Block4[2] = Dirichlet[3];
Block4[3] = Dirichlet[4];
int Width=3;
TheHMC.Resources.SetMomentumFilter(new DDHMCFilter<WilsonImplR::Field>(Block4,Width));
//////////////////////////
// Fermion Grid
//////////////////////////
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
IwasakiGaugeActionR GaugeAction(beta);
// temporarily need a gauge field
LatticeGaugeField U(GridPtr);
// These lines are unecessary if BC are all periodic
std::vector<Complex> boundary = {1,1,1,-1};
FermionAction::ImplParams Params(boundary);
double StoppingCondition = 1e-8;
double MDStoppingCondition = 1e-6;
double MaxCGIterations = 30000;
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
////////////////////////////////////
// Collect actions
////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1);
ActionLevel<HMCWrapper::Field> Level2(4);
ActionLevel<HMCWrapper::Field> Level3(8);
////////////////////////////////////
// Strange action
////////////////////////////////////
FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params);
FermionAction StrangeOpDir (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
FermionAction StrangePauliVillarsOpDir(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params);
StrangeOpDir.DirichletBlock(Dirichlet);
StrangePauliVillarsOpDir.DirichletBlock(Dirichlet);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionBdy(StrangeOpDir,StrangeOp,SFRp);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionLocal(StrangePauliVillarsOpDir,StrangeOpDir,SFRp);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionPVBdy(StrangePauliVillarsOp,StrangePauliVillarsOpDir,SFRp);
Level1.push_back(&StrangePseudoFermionBdy);
Level2.push_back(&StrangePseudoFermionLocal);
Level1.push_back(&StrangePseudoFermionPVBdy);
////////////////////////////////////
// up down action
////////////////////////////////////
std::vector<Real> light_den;
std::vector<Real> light_num;
std::vector<int> dirichlet_den;
std::vector<int> dirichlet_num;
int n_hasenbusch = hasenbusch.size();
light_den.push_back(light_mass); dirichlet_den.push_back(0);
for(int h=0;h<n_hasenbusch;h++){
light_den.push_back(hasenbusch[h]); dirichlet_den.push_back(1);
}
for(int h=0;h<n_hasenbusch;h++){
light_num.push_back(hasenbusch[h]); dirichlet_num.push_back(1);
}
light_num.push_back(pv_mass); dirichlet_num.push_back(0);
std::vector<FermionAction *> Numerators;
std::vector<FermionAction *> Denominators;
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
std::vector<OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
for(int h=0;h<n_hasenbusch+1;h++){
std::cout << GridLogMessage
<< " 2f quotient Action ";
std::cout << "det D("<<light_den[h]<<")";
if ( dirichlet_den[h] ) std::cout << "^dirichlet ";
std::cout << "/ det D("<<light_num[h]<<")";
if ( dirichlet_num[h] ) std::cout << "^dirichlet ";
std::cout << std::endl;
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
if(h!=0) {
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],MDCG,CG));
} else {
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
}
if ( dirichlet_den[h]==1) Denominators[h]->DirichletBlock(Dirichlet);
if ( dirichlet_num[h]==1) Numerators[h]->DirichletBlock(Dirichlet);
}
int nquo=Quotients.size();
Level1.push_back(Bdys[0]);
Level1.push_back(Bdys[1]);
for(int h=0;h<nquo-1;h++){
Level2.push_back(Quotients[h]);
}
Level2.push_back(Quotients[nquo-1]);
/////////////////////////////////////////////////////////////
// Gauge action
/////////////////////////////////////////////////////////////
Level3.push_back(&GaugeAction);
TheHMC.TheAction.push_back(Level1);
TheHMC.TheAction.push_back(Level2);
TheHMC.TheAction.push_back(Level3);
std::cout << GridLogMessage << " Action complete "<< std::endl;
/////////////////////////////////////////////////////////////
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
TheHMC.Run(); // no smearing
Grid_finalize();
} // main

View File

@ -1,419 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_hmc_EODWFRatio.cc
Copyright (C) 2015-2016
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
int main(int argc, char **argv) {
using namespace Grid;
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
// Typedefs to simplify notation
typedef WilsonImplR FermionImplPolicy;
typedef MobiusFermionR FermionAction;
typedef typename FermionAction::FermionField FermionField;
typedef Grid::XmlReader Serialiser;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// MD.name = std::string("Leap Frog");
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
// MD.name = std::string("Force Gradient");
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
MD.name = std::string("MinimumNorm2");
MD.MDsteps = 6;
MD.trajL = 1.0;
HMCparameters HMCparams;
HMCparams.StartTrajectory = 1077;
HMCparams.Trajectories = 1;
HMCparams.NoMetropolisUntil= 0;
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
// HMCparams.StartingType =std::string("ColdStart");
HMCparams.StartingType =std::string("CheckpointStart");
HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams);
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_DDHMC_lat";
CPparams.rng_prefix = "ckpoint_DDHMC_rng";
CPparams.saveInterval = 1;
CPparams.format = "IEEE64BIG";
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
RNGModuleParameters RNGpar;
RNGpar.serial_seeds = "1 2 3 4 5";
RNGpar.parallel_seeds = "6 7 8 9 10";
TheHMC.Resources.SetRNGSeeds(RNGpar);
// Construct observables
// here there is too much indirection
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
const int Ls = 12;
RealD M5 = 1.8;
RealD b = 1.5;
RealD c = 0.5;
// Real beta = 2.31;
// Real light_mass = 5.4e-4;
Real beta = 2.13;
Real light_mass = 7.8e-4;
Real strange_mass = 0.02132;
Real pv_mass = 1.0;
// std::vector<Real> hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
// FIXME:
// Same in MC and MD
// Need to mix precision too
OneFlavourRationalParams SFRp; // Strange
SFRp.lo = 4.0e-3;
SFRp.hi = 90.0;
SFRp.MaxIter = 60000;
SFRp.tolerance= 1.0e-8;
SFRp.mdtolerance= 1.0e-4;
SFRp.degree = 12;
SFRp.precision= 50;
SFRp.BoundsCheckFreq=0;
OneFlavourRationalParams OFRp; // Up/down
OFRp.lo = 2.0e-5;
OFRp.hi = 90.0;
OFRp.MaxIter = 60000;
OFRp.tolerance= 1.0e-7;
OFRp.mdtolerance= 1.0e-4;
// OFRp.degree = 20; converges
// OFRp.degree = 16;
OFRp.degree = 12;
OFRp.precision= 80;
OFRp.BoundsCheckFreq=0;
auto GridPtr = TheHMC.Resources.GetCartesian();
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
////////////////////////////////////////////////////////////////
// Domain decomposed
////////////////////////////////////////////////////////////////
Coordinate latt4 = GridPtr->GlobalDimensions();
Coordinate mpi = GridPtr->ProcessorGrid();
Coordinate shm;
GlobalSharedMemory::GetShmDims(mpi,shm);
Coordinate CommDim(Nd);
for(int d=0;d<Nd;d++) CommDim[d]= (mpi[d]/shm[d])>1 ? 1 : 0;
Coordinate NonDirichlet(Nd+1,0);
Coordinate Dirichlet(Nd+1,0);
Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0];
Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1];
Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2];
Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3];
Coordinate Block4(Nd);
// Block4[0] = Dirichlet[1];
// Block4[1] = Dirichlet[2];
// Block4[2] = Dirichlet[3];
Block4[0] = 0;
Block4[1] = 0;
Block4[2] = 0;
Block4[3] = Dirichlet[4];
int Width=3;
TheHMC.Resources.SetMomentumFilter(new DDHMCFilter<WilsonImplR::Field>(Block4,Width));
//////////////////////////
// Fermion Grid
//////////////////////////
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
IwasakiGaugeActionR GaugeAction(beta);
// temporarily need a gauge field
LatticeGaugeField U(GridPtr);
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
TheHMC.initializeGaugeFieldAndRNGs(U);
// These lines are unecessary if BC are all periodic
std::vector<Complex> boundary = {1,1,1,-1};
FermionAction::ImplParams Params(boundary);
Params.dirichlet=NonDirichlet;
FermionAction::ImplParams ParamsDir(boundary);
ParamsDir.dirichlet=Dirichlet;
// double StoppingCondition = 1e-14;
// double MDStoppingCondition = 1e-9;
double StoppingCondition = 1e-8;
double MDStoppingCondition = 1e-6;
double MaxCGIterations = 300000;
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
////////////////////////////////////
// Collect actions
////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1);
ActionLevel<HMCWrapper::Field> Level2(4);
ActionLevel<HMCWrapper::Field> Level3(8);
////////////////////////////////////
// Strange action
////////////////////////////////////
FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params);
FermionAction StrangeOpDir (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, ParamsDir);
FermionAction StrangePauliVillarsOpDir(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, ParamsDir);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionBdy(StrangeOpDir,StrangeOp,SFRp);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionLocal(StrangePauliVillarsOpDir,StrangeOpDir,SFRp);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionPVBdy(StrangePauliVillarsOp,StrangePauliVillarsOpDir,SFRp);
Level1.push_back(&StrangePseudoFermionBdy);
Level2.push_back(&StrangePseudoFermionLocal);
Level1.push_back(&StrangePseudoFermionPVBdy);
////////////////////////////////////
// up down action
////////////////////////////////////
std::vector<Real> light_den;
std::vector<Real> light_num;
std::vector<int> dirichlet_den;
std::vector<int> dirichlet_num;
int n_hasenbusch = hasenbusch.size();
light_den.push_back(light_mass); dirichlet_den.push_back(0);
for(int h=0;h<n_hasenbusch;h++){
light_den.push_back(hasenbusch[h]); dirichlet_den.push_back(1);
}
for(int h=0;h<n_hasenbusch;h++){
light_num.push_back(hasenbusch[h]); dirichlet_num.push_back(1);
}
light_num.push_back(pv_mass); dirichlet_num.push_back(0);
std::vector<FermionAction *> Numerators;
std::vector<FermionAction *> Denominators;
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
std::vector<OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
for(int h=0;h<n_hasenbusch+1;h++){
std::cout << GridLogMessage
<< " 2f quotient Action ";
std::cout << "det D("<<light_den[h]<<")";
if ( dirichlet_den[h] ) std::cout << "^dirichlet ";
std::cout << "/ det D("<<light_num[h]<<")";
if ( dirichlet_num[h] ) std::cout << "^dirichlet ";
std::cout << std::endl;
FermionAction::ImplParams ParamsNum(boundary);
FermionAction::ImplParams ParamsDen(boundary);
if ( dirichlet_num[h]==1) ParamsNum.dirichlet = Dirichlet;
else ParamsNum.dirichlet = NonDirichlet;
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, ParamsNum));
if ( dirichlet_den[h]==1) ParamsDen.dirichlet = Dirichlet;
else ParamsDen.dirichlet = NonDirichlet;
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, ParamsDen));
if(h!=0) {
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],MDCG,CG));
} else {
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
}
}
int nquo=Quotients.size();
Level1.push_back(Bdys[0]);
Level1.push_back(Bdys[1]);
for(int h=0;h<nquo-1;h++){
Level2.push_back(Quotients[h]);
}
Level2.push_back(Quotients[nquo-1]);
/////////////////////////////////////////////////////////////
// Gauge action
/////////////////////////////////////////////////////////////
Level3.push_back(&GaugeAction);
TheHMC.TheAction.push_back(Level1);
TheHMC.TheAction.push_back(Level2);
TheHMC.TheAction.push_back(Level3);
std::cout << GridLogMessage << " Action complete "<< std::endl;
/////////////////////////////////////////////////////////////
if(1){
// TODO:
// i) Break high bound, how rapidly does it break? Tune this test.
// ii) Break low bound, how rapidly?
// iii) Run lanczos
// iv) Have CG return spectral range estimate
FermionField vec(StrangeOp.FermionRedBlackGrid());
FermionField res(StrangeOp.FermionRedBlackGrid());
vec = 1; // Fill with any old junk
std::cout << "Bounds check on strange operator mass "<< StrangeOp.Mass()<<std::endl;
SchurDifferentiableOperator<FermionImplPolicy> SdagS(StrangeOp);
HighBoundCheck(SdagS,vec,SFRp.hi);
ChebyBoundsCheck(SdagS,vec,SFRp.lo,SFRp.hi);
std::cout << "Strange inversion"<<std::endl;
res=Zero();
// MDCG(SdagS,vec,res);
std::cout << "Bounds check on light quark operator mass "<< Denominators[0]->Mass() <<std::endl;
SchurDifferentiableOperator<FermionImplPolicy> UdagU(*Denominators[0]);
HighBoundCheck(UdagU,vec,OFRp.hi);
ChebyBoundsCheck(UdagU,vec,OFRp.lo,OFRp.hi);
std::cout << "light inversion"<<std::endl;
res=Zero();
// MDCG(UdagU,vec,res);
std::cout << "Bounds check on strange dirichlet operator mass "<< StrangeOpDir.Mass()<<std::endl;
SchurDifferentiableOperator<FermionImplPolicy> SddagSd(StrangeOpDir);
HighBoundCheck(SddagSd,vec,OFRp.hi);
ChebyBoundsCheck(SddagSd,vec,OFRp.lo,OFRp.hi);
std::cout << "strange dirichlet inversion"<<std::endl;
res=Zero();
// MDCG(SddagSd,vec,res);
std::cout << "Bounds check on light dirichlet operator mass "<< Numerators[0]->Mass()<<std::endl;
SchurDifferentiableOperator<FermionImplPolicy> UddagUd(*Numerators[0]);
HighBoundCheck(UddagUd,vec,OFRp.hi);
ChebyBoundsCheck(UddagUd,vec,OFRp.lo,OFRp.hi);
std::cout << "light dirichlet inversion"<<std::endl;
res=Zero();
//MDCG(UddagUd,vec,res);
auto grid4= GridPtr;
auto rbgrid4= GridRBPtr;
auto rbgrid = StrangeOp.FermionRedBlackGrid();
auto grid = StrangeOp.FermionGrid();
if(1){
const int Nstop = 5;
const int Nk = 20;
const int Np = 20;
const int Nm = Nk+Np;
const int MaxIt= 10000;
int Nconv;
RealD resid = 1.0e-5;
if(0)
{
int order = 501;
RealD bound = 5.0e-4;
std::cout << GridLogMessage << " Lanczos for dirichlet bound " << bound<<" order "<< order<<std::endl;
Chebyshev<FermionField> Cheby(bound,90.,order);
FunctionHermOp<FermionField> OpCheby(Cheby,UddagUd);
PlainHermOp<FermionField> Op (UddagUd);
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt);
std::vector<RealD> eval(Nm);
std::vector<FermionField> evec(Nm,rbgrid);
FermionField src(rbgrid);src = 1.0;
IRL.calc(eval,evec,src,Nconv);
FermionField tmp(rbgrid);
FermionField ftmp(grid);
FermionField ftmp4(grid4);
for(int ev=0;ev<evec.size();ev++){
Gamma GT(Gamma::Algebra::GammaT);
std::cout << " evec " << ev << std::endl;
tmp = evec[ev] + GT*evec[ev];
DumpSliceNorm(" 1+gammaT ",tmp,Nd);
tmp = evec[ev] - GT*evec[ev];
DumpSliceNorm(" 1-gammaT ",tmp,Nd);
}
for(int e=0;e<10;e++){
std::cout << " Dirichlet evec "<<e<<std::endl;
tmp = evec[e];
for(int s=0;s<Ls;s++){
ftmp=Zero();
setCheckerboard(ftmp,tmp);
ExtractSlice(ftmp4,ftmp,s,0);
std::cout << "s-slice "<<s<< " evec[0] " << std::endl;
DumpSliceNorm(" s-slice ",ftmp4,Nd-1);
}
}
}
if(1)
{
int order = 2001;
RealD bound = 6.0e-5;
std::cout << GridLogMessage << " Lanczos for full operator bound " << bound<<" order "<< order<<std::endl;
Chebyshev<FermionField> Cheby(bound,90.,order);
FunctionHermOp<FermionField> OpCheby(Cheby,UdagU);
PlainHermOp<FermionField> Op (UdagU);
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt);
std::vector<RealD> eval(Nm);
std::vector<FermionField> evec(Nm,rbgrid);
FermionField src(rbgrid); src = 1.0;
IRL.calc(eval,evec,src,Nconv);
FermionField tmp(rbgrid);
FermionField ftmp(grid);
FermionField ftmp4(grid4);
for(int e=0;e<evec.size();e++){
std::cout << " Full evec "<<e<<std::endl;
tmp = evec[e];
for(int s=0;s<Ls;s++){
ftmp=Zero();
setCheckerboard(ftmp,tmp);
ExtractSlice(ftmp4,ftmp,s,0);
std::cout << "s-slice "<<s<< " evec[0] " << std::endl;
DumpSliceNorm(" s-slice ",ftmp4,Nd-1);
}
}
}
Grid_finalize();
std::cout << " All done "<<std::endl;
exit(EXIT_SUCCESS);
}
}
TheHMC.Run(); // no smearing
Grid_finalize();
} // main

View File

@ -1,444 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_hmc_EODWFRatio.cc
Copyright (C) 2015-2016
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
NAMESPACE_BEGIN(Grid);
template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class SchurOperatorF>
class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
public:
typedef typename FermionOperatorD::FermionField FieldD;
typedef typename FermionOperatorF::FermionField FieldF;
using OperatorFunction<FieldD>::operator();
RealD Tolerance;
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations;
Integer MaxOuterIterations;
GridBase* SinglePrecGrid4; //Grid for single-precision fields
GridBase* SinglePrecGrid5; //Grid for single-precision fields
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
FermionOperatorF &FermOpF;
FermionOperatorD &FermOpD;;
SchurOperatorF &LinOpF;
SchurOperatorD &LinOpD;
Integer TotalInnerIterations; //Number of inner CG iterations
Integer TotalOuterIterations; //Number of restarts
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
MixedPrecisionConjugateGradientOperatorFunction(RealD tol,
Integer maxinnerit,
Integer maxouterit,
GridBase* _sp_grid4,
GridBase* _sp_grid5,
FermionOperatorF &_FermOpF,
FermionOperatorD &_FermOpD,
SchurOperatorF &_LinOpF,
SchurOperatorD &_LinOpD):
LinOpF(_LinOpF),
LinOpD(_LinOpD),
FermOpF(_FermOpF),
FermOpD(_FermOpD),
Tolerance(tol),
InnerTolerance(tol),
MaxInnerIterations(maxinnerit),
MaxOuterIterations(maxouterit),
SinglePrecGrid4(_sp_grid4),
SinglePrecGrid5(_sp_grid5),
OuterLoopNormMult(100.)
{
/* Debugging instances of objects; references are stored
std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " <<std::hex<< &LinOpF<<std::dec <<std::endl;
std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpD " <<std::hex<< &LinOpD<<std::dec <<std::endl;
std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpF " <<std::hex<< &FermOpF<<std::dec <<std::endl;
std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <<std::hex<< &FermOpD<<std::dec <<std::endl;
*/
};
void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
// std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <<std::hex<< &(SchurOpU->_Mat)<<std::dec <<std::endl;
// std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpD " <<std::hex<< &(LinOpD._Mat) <<std::dec <<std::endl;
// Assumption made in code to extract gauge field
// We could avoid storing LinopD reference alltogether ?
assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
////////////////////////////////////////////////////////////////////////////////////
// Must snarf a single precision copy of the gauge field in Linop_d argument
////////////////////////////////////////////////////////////////////////////////////
typedef typename FermionOperatorF::GaugeField GaugeFieldF;
typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF;
typedef typename FermionOperatorD::GaugeField GaugeFieldD;
typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD;
GridBase * GridPtrF = SinglePrecGrid4;
GridBase * GridPtrD = FermOpD.Umu.Grid();
GaugeFieldF U_f (GridPtrF);
GaugeLinkFieldF Umu_f(GridPtrF);
// std::cout << " Dim gauge field "<<GridPtrF->Nd()<<std::endl; // 4d
// std::cout << " Dim gauge field "<<GridPtrD->Nd()<<std::endl; // 4d
////////////////////////////////////////////////////////////////////////////////////
// Moving this to a Clone method of fermion operator would allow to duplicate the
// physics parameters and decrease gauge field copies
////////////////////////////////////////////////////////////////////////////////////
GaugeLinkFieldD Umu_d(GridPtrD);
for(int mu=0;mu<Nd*2;mu++){
Umu_d = PeekIndex<LorentzIndex>(FermOpD.Umu, mu);
precisionChange(Umu_f,Umu_d);
PokeIndex<LorentzIndex>(FermOpF.Umu, Umu_f, mu);
}
pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
////////////////////////////////////////////////////////////////////////////////////
// Make a mixed precision conjugate gradient
////////////////////////////////////////////////////////////////////////////////////
MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
MPCG(src,psi);
}
};
NAMESPACE_END(Grid);
int main(int argc, char **argv) {
using namespace Grid;
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
// Typedefs to simplify notation
typedef WilsonImplR FermionImplPolicy;
typedef WilsonImplF FermionImplPolicyF;
typedef MobiusFermionR FermionAction;
typedef MobiusFermionF FermionActionF;
typedef typename FermionAction::FermionField FermionField;
typedef typename FermionActionF::FermionField FermionFieldF;
typedef Grid::XmlReader Serialiser;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// MD.name = std::string("Leap Frog");
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
// MD.name = std::string("Force Gradient");
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
MD.name = std::string("MinimumNorm2");
MD.MDsteps = 4;
MD.trajL = 1.0;
HMCparameters HMCparams;
HMCparams.StartTrajectory = 1077;
HMCparams.Trajectories = 1;
HMCparams.NoMetropolisUntil= 0;
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
// HMCparams.StartingType =std::string("ColdStart");
HMCparams.StartingType =std::string("CheckpointStart");
HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams);
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_DDHMC_lat";
CPparams.rng_prefix = "ckpoint_DDHMC_rng";
CPparams.saveInterval = 1;
CPparams.format = "IEEE64BIG";
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
RNGModuleParameters RNGpar;
RNGpar.serial_seeds = "1 2 3 4 5";
RNGpar.parallel_seeds = "6 7 8 9 10";
TheHMC.Resources.SetRNGSeeds(RNGpar);
// Construct observables
// here there is too much indirection
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
const int Ls = 12;
RealD M5 = 1.8;
RealD b = 1.5;
RealD c = 0.5;
Real beta = 2.31;
// Real light_mass = 5.4e-4;
Real light_mass = 7.8e-4;
Real strange_mass = 0.02132;
Real pv_mass = 1.0;
std::vector<Real> hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
// FIXME:
// Same in MC and MD
// Need to mix precision too
OneFlavourRationalParams SFRp; // Strange
SFRp.lo = 4.0e-3;
SFRp.hi = 90.0;
SFRp.MaxIter = 60000;
SFRp.tolerance= 1.0e-8;
SFRp.mdtolerance= 1.0e-6;
SFRp.degree = 12;
SFRp.precision= 50;
SFRp.BoundsCheckFreq=0;
OneFlavourRationalParams OFRp; // Up/down
OFRp.lo = 2.0e-5;
OFRp.hi = 90.0;
OFRp.MaxIter = 60000;
OFRp.tolerance= 1.0e-8;
OFRp.mdtolerance= 1.0e-6;
// OFRp.degree = 20; converges
// OFRp.degree = 16;
OFRp.degree = 12;
OFRp.precision= 80;
OFRp.BoundsCheckFreq=0;
auto GridPtr = TheHMC.Resources.GetCartesian();
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusFermionD,MobiusFermionF,LinearOperatorD,LinearOperatorF> MxPCG;
////////////////////////////////////////////////////////////////
// Domain decomposed
////////////////////////////////////////////////////////////////
Coordinate latt4 = GridPtr->GlobalDimensions();
Coordinate mpi = GridPtr->ProcessorGrid();
Coordinate shm;
GlobalSharedMemory::GetShmDims(mpi,shm);
Coordinate CommDim(Nd);
for(int d=0;d<Nd;d++) CommDim[d]= (mpi[d]/shm[d])>1 ? 1 : 0;
Coordinate NonDirichlet(Nd+1,0);
Coordinate Dirichlet(Nd+1,0);
Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0];
Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1];
Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2];
Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3];
Coordinate Block4(Nd);
Block4[0] = Dirichlet[1];
Block4[1] = Dirichlet[2];
Block4[2] = Dirichlet[3];
Block4[3] = Dirichlet[4];
int Width=3;
TheHMC.Resources.SetMomentumFilter(new DDHMCFilter<WilsonImplR::Field>(Block4,Width));
//////////////////////////
// Fermion Grids
//////////////////////////
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
Coordinate simdF = GridDefaultSimd(Nd,vComplexF::Nsimd());
auto GridPtrF = SpaceTimeGrid::makeFourDimGrid(latt4,simdF,mpi);
auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF);
auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF);
auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF);
IwasakiGaugeActionR GaugeAction(beta);
// temporarily need a gauge field
LatticeGaugeField U(GridPtr);
LatticeGaugeFieldF UF(GridPtrF);
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
TheHMC.initializeGaugeFieldAndRNGs(U);
// These lines are unecessary if BC are all periodic
std::vector<Complex> boundary = {1,1,1,-1};
FermionAction::ImplParams Params(boundary);
Params.dirichlet=NonDirichlet;
FermionAction::ImplParams ParamsDir(boundary);
ParamsDir.dirichlet=Dirichlet;
// double StoppingCondition = 1e-14;
// double MDStoppingCondition = 1e-9;
double StoppingCondition = 1e-10;
double MDStoppingCondition = 1e-7;
double MDStoppingConditionLoose = 1e-6;
double MaxCGIterations = 300000;
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
////////////////////////////////////
// Collect actions
////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1);
ActionLevel<HMCWrapper::Field> Level2(4);
ActionLevel<HMCWrapper::Field> Level3(8);
////////////////////////////////////
// Strange action
////////////////////////////////////
FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params);
FermionAction StrangeOpDir (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, ParamsDir);
FermionAction StrangePauliVillarsOpDir(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, ParamsDir);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionBdy(StrangeOpDir,StrangeOp,SFRp);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionLocal(StrangePauliVillarsOpDir,StrangeOpDir,SFRp);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionPVBdy(StrangePauliVillarsOp,StrangePauliVillarsOpDir,SFRp);
Level1.push_back(&StrangePseudoFermionBdy);
Level2.push_back(&StrangePseudoFermionLocal);
Level1.push_back(&StrangePseudoFermionPVBdy);
////////////////////////////////////
// up down action
////////////////////////////////////
std::vector<Real> light_den;
std::vector<Real> light_num;
std::vector<int> dirichlet_den;
std::vector<int> dirichlet_num;
int n_hasenbusch = hasenbusch.size();
light_den.push_back(light_mass); dirichlet_den.push_back(0);
for(int h=0;h<n_hasenbusch;h++){
light_den.push_back(hasenbusch[h]); dirichlet_den.push_back(1);
}
for(int h=0;h<n_hasenbusch;h++){
light_num.push_back(hasenbusch[h]); dirichlet_num.push_back(1);
}
light_num.push_back(pv_mass); dirichlet_num.push_back(0);
std::vector<FermionAction *> Numerators;
std::vector<FermionAction *> Denominators;
std::vector<FermionActionF *> DenominatorsF;
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
std::vector<OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
std::vector<MxPCG *> ActionMPCG;
std::vector<MxPCG *> MPCG;
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
std::vector<LinearOperatorD *> LinOpD;
std::vector<LinearOperatorF *> LinOpF;
for(int h=0;h<n_hasenbusch+1;h++){
std::cout << GridLogMessage
<< " 2f quotient Action ";
std::cout << "det D("<<light_den[h]<<")";
if ( dirichlet_den[h] ) std::cout << "^dirichlet ";
std::cout << "/ det D("<<light_num[h]<<")";
if ( dirichlet_num[h] ) std::cout << "^dirichlet ";
std::cout << std::endl;
FermionAction::ImplParams ParamsNum(boundary);
FermionAction::ImplParams ParamsDen(boundary);
FermionActionF::ImplParams ParamsDenF(boundary);
if ( dirichlet_num[h]==1) ParamsNum.dirichlet = Dirichlet;
else ParamsNum.dirichlet = NonDirichlet;
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, ParamsNum));
if ( dirichlet_den[h]==1) ParamsDen.dirichlet = Dirichlet;
else ParamsDen.dirichlet = NonDirichlet;
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, ParamsDen));
ParamsDenF.dirichlet = ParamsDen.dirichlet;
DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsDenF));
LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h]));
double conv = MDStoppingCondition;
if (h<3) conv= MDStoppingConditionLoose; // Relax on first two hasenbusch factors
const int MX_inner = 5000;
MPCG.push_back(new MxPCG(conv,
MX_inner,
MaxCGIterations,
GridPtrF,
FrbGridF,
*DenominatorsF[h],*Denominators[h],
*LinOpF[h], *LinOpD[h]) );
ActionMPCG.push_back(new MxPCG(StoppingCondition,
MX_inner,
MaxCGIterations,
GridPtrF,
FrbGridF,
*DenominatorsF[h],*Denominators[h],
*LinOpF[h], *LinOpD[h]) );
if(h!=0) {
// Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],MDCG,CG));
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG));
} else {
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
}
}
int nquo=Quotients.size();
Level1.push_back(Bdys[0]);
Level1.push_back(Bdys[1]);
for(int h=0;h<nquo-1;h++){
Level2.push_back(Quotients[h]);
}
Level2.push_back(Quotients[nquo-1]);
/////////////////////////////////////////////////////////////
// Gauge action
/////////////////////////////////////////////////////////////
Level3.push_back(&GaugeAction);
TheHMC.TheAction.push_back(Level1);
TheHMC.TheAction.push_back(Level2);
TheHMC.TheAction.push_back(Level3);
std::cout << GridLogMessage << " Action complete "<< std::endl;
/////////////////////////////////////////////////////////////
TheHMC.Run(); // no smearing
Grid_finalize();
} // main

View File

@ -1,53 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file:
Copyright (C) 2015-2016
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
int main(int argc, char **argv)
{
using namespace Grid;
Grid_init(&argc, &argv);
Coordinate latt4 = GridDefaultLatt();
Coordinate mpi = GridDefaultMpi();
Coordinate simd = GridDefaultSimd(Nd,vComplexD::Nsimd());
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4,simd,mpi);
GridSerialRNG sRNG; sRNG.SeedUniqueString(std::string("The Serial RNG"));
GridParallelRNG pRNG(UGrid); pRNG.SeedUniqueString(std::string("The 4D RNG"));
std::string rngfile("ckpoint_rng.0");
NerscIO::writeRNGState(sRNG, pRNG, rngfile);
Grid_finalize();
}

View File

@ -148,7 +148,7 @@ If you want to build all the tests at once just use `make tests`.
- `--enable-mkl[=<path>]`: use Intel MKL for FFT (and LAPACK if enabled) routines. A UNIX prefix containing the library can be specified (optional).
- `--enable-numa`: enable NUMA first touch optimisation
- `--enable-simd=<code>`: setup Grid for the SIMD target `<code>` (default: `GEN`). A list of possible SIMD targets is detailed in a section below.
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes).
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 64 bytes).
- `--enable-comms=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
- `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `).
- `--disable-timers`: disable system dependent high-resolution timers.

View File

@ -217,9 +217,9 @@ int main (int argc, char ** argv)
dbytes+=
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,1,
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,1,
recv_from_rank,
bytes,mu);
comm_proc = mpi_layout[mu]-1;
@ -228,9 +228,9 @@ int main (int argc, char ** argv)
dbytes+=
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,1,
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,1,
recv_from_rank,
bytes,mu+4);
}
@ -309,9 +309,9 @@ int main (int argc, char ** argv)
dbytes+=
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,1,
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,1,
recv_from_rank,
bytes,mu);
Grid.StencilSendToRecvFromComplete(requests,mu);
requests.resize(0);
@ -322,9 +322,9 @@ int main (int argc, char ** argv)
dbytes+=
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,1,
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,1,
recv_from_rank,
bytes,mu+4);
Grid.StencilSendToRecvFromComplete(requests,mu+4);
requests.resize(0);
@ -411,8 +411,8 @@ int main (int argc, char ** argv)
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
}
int tid = omp_get_thread_num();
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,1,
(void *)&rbuf[dir][0], recv_from_rank,1, bytes,tid);
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
(void *)&rbuf[dir][0], recv_from_rank, bytes,tid);
thread_critical { dbytes+=tbytes; }
}

View File

@ -191,7 +191,9 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
Dw.Dhop(src,result,0);
__SSC_STOP;
}
double t1=usecond();
FGrid->Barrier();

Some files were not shown because too many files have changed in this diff Show More