1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-22 17:52:02 +01:00

Compare commits

...

30 Commits

Author SHA1 Message Date
d3496d2fe0 Merge pull request #397 from giltirn/feature/dirichlet-gparity-stage
Gparity HMC import round 2
2022-05-25 13:29:45 -04:00
6121397587 Imported changes from feature/gparity_HMC branch:
Added storage of final true residual in mixed-prec CG and enhanced log output
	Fixed const correctness of multi-shift constructor
	Added a mixed precision variant of the multi-shift algorithm that uses a single precision operator and applies periodic reliable update to the residual
	Added tests/solver/Test_dwf_multishift_mixedprec to test the above
	Fixed local coherence lanczos using the (large!) max approx to the chebyshev eval as the scale from which to judge the quality of convergence, resulting a test that always passes
	Added a method to local coherence lanczos class that returns the fine eval/evec pair
	Added iterative log output to power method
	Added optional disabling of the plaquette check in Nerscio to support loading old G-parity configs which have a factor of 2 error in the plaquette
	G-parity Dirac op no longer allows GPBC in the time direction; instead we toggle between periodic and antiperiodic
	Replaced thread_for G-parity 5D force insertion implementation with accelerator_for version capable of running on GPUs
	Generalized tests/lanczos/Test_dwf_lanczos to support regular DWF as well as Gparity, with the action chosen by a command line option
	Modified tests/forces/Test_dwf_gpforce,Test_gpdwf_force,Test_gpwilson_force to use GPBC a spatial direction rather than the t-direction, and antiperiodic BCs for time direction
	tests/core/Test_gparity now supports using APBC in time direction using command line toggle
2022-05-09 16:27:57 -04:00
0417b96896 Merge pull request #391 from giltirn/feature/dirichlet-gparity-stage
First stage of import
2022-05-03 08:50:18 -04:00
81fe4c937e Hopefully fix link errors on Intel compilers due to having no function body for MomentumFilterBase::apply_phase 2022-04-12 09:51:59 -04:00
f77f3a6598 Imported G-parity flavor algebra + tester from feature/gparity_HMC branch 2022-04-06 10:21:04 -04:00
239afb18fb Merge branch 'feature/dirichlet' into feature/dirichlet-gparity 2022-04-05 16:49:32 -04:00
ef820a26cd Bcopy on crusher compile 2022-04-05 16:49:02 -04:00
65abe4d0d3 Merge branch 'feature/dirichlet' into feature/dirichlet-gparity 2022-04-05 16:26:54 -04:00
5012adfebf Merge branch 'develop' into feature/dirichlet 2022-04-05 16:26:19 -04:00
b808d48fa1 Tone down printing in integrator 2022-04-05 16:25:22 -04:00
83f818a99d Updates for DDHMC 2022-04-05 16:24:34 -04:00
387397374a Current run options 2022-03-23 16:35:11 -04:00
605cf401e1 Merge branch 'feature/sumd-npr' into develop 2022-03-16 22:43:12 +00:00
f99c3660d2 Merge branch 'feature/cpu-threaded-smp' into develop 2022-03-16 22:07:54 +00:00
92a83a9eb3 Performance improve for Tesseract 2022-03-16 17:14:36 +00:00
bb5c16b97f New scripts 2022-03-03 17:00:37 -05:00
0d80eeb545 small DDHMC update 2022-03-03 16:56:02 -05:00
b0f4eee78b New files 2022-03-01 19:09:13 -05:00
5340e50427 HMC running with new formulation 2022-03-01 17:10:25 -05:00
e16fc5b2e4 Threaded intranode comms transfer - ideally between NUMA domains 2022-03-01 11:17:24 -05:00
694306f202 Configure for mac arm 2022-03-01 10:53:44 -05:00
0f1c5b08a1 Dirichlet filters running on AMD and now integrated in Fermion op 2022-02-23 19:29:28 -05:00
70988e43d2 Passes multinode dirichlet test with boundaries at
node boundary or at the single rank boundary
2022-02-23 01:42:14 -05:00
aab3bcb46f Dirichlet first cut - wrong answers on dagger multiply.
Struggling to get a compute node so changing systems
2022-02-22 19:58:33 +00:00
da06d15f73 Merge branch 'feature/feature/staggered-comms' into develop 2022-02-17 04:58:50 +00:00
e8b1251b8c Staggered fix finished 2022-02-17 04:51:13 +00:00
fad5a74a4b Bug fix to detection case 2022-02-15 10:27:39 -05:00
e83f6a6ae9 Merge branch 'develop' into feature/feature/staggered-comms 2022-02-15 08:52:39 -05:00
6283d11d50 Add the comment line to tell the existance of copied data/buffer 2022-02-08 15:22:06 +00:00
6616d5d090 Commit 2022-02-02 16:38:24 -05:00
72 changed files with 3826 additions and 637 deletions

View File

@ -36,6 +36,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/GridCore.h>
#include <Grid/qcd/QCD.h>
#include <Grid/qcd/spin/Spin.h>
#include <Grid/qcd/gparity/Gparity.h>
#include <Grid/qcd/utils/Utils.h>
#include <Grid/qcd/representations/Representations.h>
NAMESPACE_CHECK(GridQCDCore);

View File

@ -16,6 +16,7 @@
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>

View File

@ -54,6 +54,7 @@ NAMESPACE_CHECK(BiCGSTAB);
#include <Grid/algorithms/iterative/SchurRedBlack.h>
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
#include <Grid/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h>
#include <Grid/algorithms/iterative/BiCGSTABMixedPrec.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
#include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>

View File

@ -49,6 +49,7 @@ NAMESPACE_BEGIN(Grid);
Integer TotalInnerIterations; //Number of inner CG iterations
Integer TotalOuterIterations; //Number of restarts
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
RealD TrueResidual;
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
LinearFunction<FieldF> *guesser;
@ -68,6 +69,7 @@ NAMESPACE_BEGIN(Grid);
}
void operator() (const FieldD &src_d_in, FieldD &sol_d){
std::cout << GridLogMessage << "MixedPrecisionConjugateGradient: Starting mixed precision CG with outer tolerance " << Tolerance << " and inner tolerance " << InnerTolerance << std::endl;
TotalInnerIterations = 0;
GridStopWatch TotalTimer;
@ -97,6 +99,7 @@ NAMESPACE_BEGIN(Grid);
FieldF sol_f(SinglePrecGrid);
sol_f.Checkerboard() = cb;
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting initial inner CG with tolerance " << inner_tol << std::endl;
ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations);
CG_f.ErrorOnNoConverge = false;
@ -130,6 +133,7 @@ NAMESPACE_BEGIN(Grid);
(*guesser)(src_f, sol_f);
//Inner CG
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " << outer_iter << " starting inner CG with tolerance " << inner_tol << std::endl;
CG_f.Tolerance = inner_tol;
InnerCGtimer.Start();
CG_f(Linop_f, src_f, sol_f);
@ -150,6 +154,7 @@ NAMESPACE_BEGIN(Grid);
ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
CG_d(Linop_d, src_d_in, sol_d);
TotalFinalStepIterations = CG_d.IterationsToComplete;
TrueResidual = CG_d.TrueResidual;
TotalTimer.Stop();
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl;

View File

@ -52,7 +52,7 @@ public:
MultiShiftFunction shifts;
std::vector<RealD> TrueResidualShift;
ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) :
ConjugateGradientMultiShift(Integer maxit, const MultiShiftFunction &_shifts) :
MaxIterations(maxit),
shifts(_shifts)
{
@ -183,6 +183,9 @@ public:
axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
}
std::cout << GridLogIterative << "ConjugateGradientMultiShift: initial rn (|src|^2) =" << rn << " qq (|MdagM src|^2) =" << qq << " d ( dot(src, [MdagM + m_0]src) ) =" << d << " c=" << c << std::endl;
///////////////////////////////////////
// Timers
///////////////////////////////////////

View File

@ -0,0 +1,409 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Christopher Kelly <ckelly@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H
#define GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H
NAMESPACE_BEGIN(Grid);
//CK 2020: A variant of the multi-shift conjugate gradient with the matrix multiplication in single precision.
//The residual is stored in single precision, but the search directions and solution are stored in double precision.
//Every update_freq iterations the residual is corrected in double precision.
//For safety the a final regular CG is applied to clean up if necessary
//Linop to add shift to input linop, used in cleanup CG
namespace ConjugateGradientMultiShiftMixedPrecSupport{
template<typename Field>
class ShiftedLinop: public LinearOperatorBase<Field>{
public:
LinearOperatorBase<Field> &linop_base;
RealD shift;
ShiftedLinop(LinearOperatorBase<Field> &_linop_base, RealD _shift): linop_base(_linop_base), shift(_shift){}
void OpDiag (const Field &in, Field &out){ assert(0); }
void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); }
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); }
void Op (const Field &in, Field &out){ assert(0); }
void AdjOp (const Field &in, Field &out){ assert(0); }
void HermOp(const Field &in, Field &out){
linop_base.HermOp(in, out);
axpy(out, shift, in, out);
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
HermOp(in,out);
ComplexD dot = innerProduct(in,out);
n1=real(dot);
n2=norm2(out);
}
};
};
template<class FieldD, class FieldF,
typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
class ConjugateGradientMultiShiftMixedPrec : public OperatorMultiFunction<FieldD>,
public OperatorFunction<FieldD>
{
public:
using OperatorFunction<FieldD>::operator();
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
std::vector<int> IterationsToCompleteShift; // Iterations for this shift
int verbose;
MultiShiftFunction shifts;
std::vector<RealD> TrueResidualShift;
int ReliableUpdateFreq; //number of iterations between reliable updates
GridBase* SinglePrecGrid; //Grid for single-precision fields
LinearOperatorBase<FieldF> &Linop_f; //single precision
ConjugateGradientMultiShiftMixedPrec(Integer maxit, const MultiShiftFunction &_shifts,
GridBase* _SinglePrecGrid, LinearOperatorBase<FieldF> &_Linop_f,
int _ReliableUpdateFreq
) :
MaxIterations(maxit), shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq)
{
verbose=1;
IterationsToCompleteShift.resize(_shifts.order);
TrueResidualShift.resize(_shifts.order);
}
void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, FieldD &psi)
{
GridBase *grid = src.Grid();
int nshift = shifts.order;
std::vector<FieldD> results(nshift,grid);
(*this)(Linop,src,results,psi);
}
void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, std::vector<FieldD> &results, FieldD &psi)
{
int nshift = shifts.order;
(*this)(Linop,src,results);
psi = shifts.norm*src;
for(int i=0;i<nshift;i++){
psi = psi + shifts.residues[i]*results[i];
}
return;
}
void operator() (LinearOperatorBase<FieldD> &Linop_d, const FieldD &src_d, std::vector<FieldD> &psi_d)
{
GridBase *DoublePrecGrid = src_d.Grid();
////////////////////////////////////////////////////////////////////////
// Convenience references to the info stored in "MultiShiftFunction"
////////////////////////////////////////////////////////////////////////
int nshift = shifts.order;
std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts"
std::vector<RealD> &mresidual(shifts.tolerances);
std::vector<RealD> alpha(nshift,1.0);
//Double precision search directions
FieldD p_d(DoublePrecGrid);
std::vector<FieldD> ps_d(nshift, DoublePrecGrid);// Search directions (double precision)
FieldD tmp_d(DoublePrecGrid);
FieldD r_d(DoublePrecGrid);
FieldD mmp_d(DoublePrecGrid);
assert(psi_d.size()==nshift);
assert(mass.size()==nshift);
assert(mresidual.size()==nshift);
// dynamic sized arrays on stack; 2d is a pain with vector
RealD bs[nshift];
RealD rsq[nshift];
RealD z[nshift][2];
int converged[nshift];
const int primary =0;
//Primary shift fields CG iteration
RealD a,b,c,d;
RealD cp,bp,qq; //prev
// Matrix mult fields
FieldF r_f(SinglePrecGrid);
FieldF p_f(SinglePrecGrid);
FieldF tmp_f(SinglePrecGrid);
FieldF mmp_f(SinglePrecGrid);
FieldF src_f(SinglePrecGrid);
precisionChange(src_f, src_d);
// Check lightest mass
for(int s=0;s<nshift;s++){
assert( mass[s]>= mass[primary] );
converged[s]=0;
}
// Wire guess to zero
// Residuals "r" are src
// First search direction "p" is also src
cp = norm2(src_d);
// Handle trivial case of zero src.
if( cp == 0. ){
for(int s=0;s<nshift;s++){
psi_d[s] = Zero();
IterationsToCompleteShift[s] = 1;
TrueResidualShift[s] = 0.;
}
return;
}
for(int s=0;s<nshift;s++){
rsq[s] = cp * mresidual[s] * mresidual[s];
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift "<< s <<" target resid "<<rsq[s]<<std::endl;
ps_d[s] = src_d;
}
// r and p for primary
r_f=src_f; //residual maintained in single
p_f=src_f;
p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys
//MdagM+m[0]
Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
axpy(mmp_f,mass[0],p_f,mmp_f);
RealD rn = norm2(p_f);
d += rn*mass[0];
b = -cp /d;
// Set up the various shift variables
int iz=0;
z[0][1-iz] = 1.0;
z[0][iz] = 1.0;
bs[0] = b;
for(int s=1;s<nshift;s++){
z[s][1-iz] = 1.0;
z[s][iz] = 1.0/( 1.0 - b*(mass[s]-mass[0]));
bs[s] = b*z[s][iz];
}
// r += b[0] A.p[0]
// c= norm(r)
c=axpy_norm(r_f,b,mmp_f,r_f);
for(int s=0;s<nshift;s++) {
axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d);
}
///////////////////////////////////////
// Timers
///////////////////////////////////////
GridStopWatch AXPYTimer, ShiftTimer, QRTimer, MatrixTimer, SolverTimer, PrecChangeTimer, CleanupTimer;
SolverTimer.Start();
// Iteration loop
int k;
for (k=1;k<=MaxIterations;k++){
a = c /cp;
//Update double precision search direction by residual
PrecChangeTimer.Start();
precisionChange(r_d, r_f);
PrecChangeTimer.Stop();
AXPYTimer.Start();
axpy(p_d,a,p_d,r_d);
for(int s=0;s<nshift;s++){
if ( ! converged[s] ) {
if (s==0){
axpy(ps_d[s],a,ps_d[s],r_d);
} else{
RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b);
axpby(ps_d[s],z[s][iz],as,r_d,ps_d[s]);
}
}
}
AXPYTimer.Stop();
PrecChangeTimer.Start();
precisionChange(p_f, p_d); //get back single prec search direction for linop
PrecChangeTimer.Stop();
cp=c;
MatrixTimer.Start();
Linop_f.HermOp(p_f,mmp_f);
d=real(innerProduct(p_f,mmp_f));
MatrixTimer.Stop();
AXPYTimer.Start();
axpy(mmp_f,mass[0],p_f,mmp_f);
AXPYTimer.Stop();
RealD rn = norm2(p_f);
d += rn*mass[0];
bp=b;
b=-cp/d;
// Toggle the recurrence history
bs[0] = b;
iz = 1-iz;
ShiftTimer.Start();
for(int s=1;s<nshift;s++){
if((!converged[s])){
RealD z0 = z[s][1-iz];
RealD z1 = z[s][iz];
z[s][iz] = z0*z1*bp
/ (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
}
}
ShiftTimer.Stop();
//Update double precision solutions
AXPYTimer.Start();
for(int s=0;s<nshift;s++){
int ss = s;
if( (!converged[s]) ) {
axpy(psi_d[ss],-bs[s]*alpha[s],ps_d[s],psi_d[ss]);
}
}
//Perform reliable update if necessary; otherwise update residual from single-prec mmp
RealD c_f = axpy_norm(r_f,b,mmp_f,r_f);
AXPYTimer.Stop();
c = c_f;
if(k % ReliableUpdateFreq == 0){
//Replace r with true residual
MatrixTimer.Start();
Linop_d.HermOp(psi_d[0],mmp_d);
MatrixTimer.Stop();
AXPYTimer.Start();
axpy(mmp_d,mass[0],psi_d[0],mmp_d);
RealD c_d = axpy_norm(r_d, -1.0, mmp_d, src_d);
AXPYTimer.Stop();
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<< ", replaced |r|^2 = "<<c_f <<" with |r|^2 = "<<c_d<<std::endl;
PrecChangeTimer.Start();
precisionChange(r_f, r_d);
PrecChangeTimer.Stop();
c = c_d;
}
// Convergence checks
int all_converged = 1;
for(int s=0;s<nshift;s++){
if ( (!converged[s]) ){
IterationsToCompleteShift[s] = k;
RealD css = c * z[s][iz]* z[s][iz];
if(css<rsq[s]){
if ( ! converged[s] )
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
converged[s]=1;
} else {
all_converged=0;
}
}
}
if ( all_converged ){
SolverTimer.Stop();
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: All shifts have converged iteration "<<k<<std::endl;
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: Checking solutions"<<std::endl;
// Check answers
for(int s=0; s < nshift; s++) {
Linop_d.HermOpAndNorm(psi_d[s],mmp_d,d,qq);
axpy(tmp_d,mass[s],psi_d[s],mmp_d);
axpy(r_d,-alpha[s],src_d,tmp_d);
RealD rn = norm2(r_d);
RealD cn = norm2(src_d);
TrueResidualShift[s] = std::sqrt(rn/cn);
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift["<<s<<"] true residual "<< TrueResidualShift[s] << " target " << mresidual[s] << std::endl;
//If we have not reached the desired tolerance, do a (mixed precision) CG cleanup
if(rn >= rsq[s]){
CleanupTimer.Start();
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: performing cleanup step for shift " << s << std::endl;
//Setup linear operators for final cleanup
ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldD> Linop_shift_d(Linop_d, mass[s]);
ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldF> Linop_shift_f(Linop_f, mass[s]);
MixedPrecisionConjugateGradient<FieldD,FieldF> cg(mresidual[s], MaxIterations, MaxIterations, SinglePrecGrid, Linop_shift_f, Linop_shift_d);
cg(src_d, psi_d[s]);
TrueResidualShift[s] = cg.TrueResidual;
CleanupTimer.Stop();
}
}
std::cout << GridLogMessage << "ConjugateGradientMultiShiftMixedPrec: Time Breakdown for body"<<std::endl;
std::cout << GridLogMessage << "\tSolver " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\t\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\t\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\t\tShift " << ShiftTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\t\tPrecision Change " << PrecChangeTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tFinal Cleanup " << CleanupTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tSolver+Cleanup " << SolverTimer.Elapsed() + CleanupTimer.Elapsed() << std::endl;
IterationsToComplete = k;
return;
}
}
// ugly hack
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
// assert(0);
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -44,6 +44,7 @@ public:
int, MinRes); // Must restart
};
//This class is the input parameter class for some testing programs
struct LocalCoherenceLanczosParams : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
@ -155,6 +156,7 @@ public:
_coarse_relax_tol(coarse_relax_tol)
{ };
//evalMaxApprox: approximation of largest eval of the fine Chebyshev operator (suitably wrapped by block projection)
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
{
CoarseField v(B);
@ -181,8 +183,16 @@ public:
if( (vv<eresid*eresid) ) conv = 1;
return conv;
}
//This function is called at the end of the coarse grid Lanczos. It promotes the coarse eigenvector 'B' to the fine grid,
//applies a smoother to the result then computes the computes the *fine grid* eigenvalue (output as 'eval').
//evalMaxApprox should be the approximation of the largest eval of the fine Hermop. However when this function is called by IRL it actually passes the largest eval of the *Chebyshev* operator (as this is the max approx used for the TestConvergence above)
//As the largest eval of the Chebyshev is typically several orders of magnitude larger this makes the convergence test pass even when it should not.
//We therefore ignore evalMaxApprox here and use a value of 1.0 (note this value is already used by TestCoarse)
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
{
evalMaxApprox = 1.0; //cf above
GridBase *FineGrid = _subspace[0].Grid();
int checkerboard = _subspace[0].Checkerboard();
FineField fB(FineGrid);fB.Checkerboard() =checkerboard;
@ -201,13 +211,13 @@ public:
eval = vnum/vden;
fv -= eval*fB;
RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0);
if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
std::cout.precision(13);
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv << " target " << eresid*eresid
<<std::endl;
if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
if( (vv<eresid*eresid) ) return 1;
return 0;
}
@ -285,6 +295,10 @@ public:
evals_coarse.resize(0);
};
//The block inner product is the inner product on the fine grid locally summed over the blocks
//to give a Lattice<Scalar> on the coarse grid. This function orthnormalizes the fine-grid subspace
//vectors under the block inner product. This step must be performed after computing the fine grid
//eigenvectors and before computing the coarse grid eigenvectors.
void Orthogonalise(void ) {
CoarseScalar InnerProd(_CoarseGrid);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
@ -328,6 +342,8 @@ public:
}
}
//While this method serves to check the coarse eigenvectors, it also recomputes the eigenvalues from the smoothed reconstructed eigenvectors
//hence the smoother can be tuned after running the coarse Lanczos by using a different smoother here
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
{
assert(evals_fine.size() == nbasis);
@ -376,18 +392,23 @@ public:
evals_fine.resize(nbasis);
subspace.resize(nbasis,_FineGrid);
}
//cheby_op: Parameters of the fine grid Chebyshev polynomial used for the Lanczos acceleration
//cheby_smooth: Parameters of a separate Chebyshev polynomial used after the Lanczos has completed to smooth out high frequency noise in the reconstructed fine grid eigenvectors prior to computing the eigenvalue
//relax: Reconstructed eigenvectors (post smoothing) are naturally not as precise as true eigenvectors. This factor acts as a multiplier on the stopping condition when determining whether the results satisfy the user provided stopping condition
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
int Nstop, int Nk, int Nm,RealD resid,
RealD MaxIt, RealD betastp, int MinRes)
{
Chebyshev<FineField> Cheby(cheby_op);
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
Chebyshev<FineField> Cheby(cheby_op); //Chebyshev of fine operator on fine grid
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace); //Fine operator on coarse grid with intermediate fine grid conversion
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace); //Chebyshev of fine operator on coarse grid with intermediate fine grid conversion
//////////////////////////////////////////////////////////////////////////////////////////////////
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
//////////////////////////////////////////////////////////////////////////////////////////////////
Chebyshev<FineField> ChebySmooth(cheby_smooth);
Chebyshev<FineField> ChebySmooth(cheby_smooth); //lower order Chebyshev of fine operator on fine grid used to smooth regenerated eigenvectors
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
evals_coarse.resize(Nm);
@ -395,6 +416,7 @@ public:
CoarseField src(_CoarseGrid); src=1.0;
//Note the "tester" here is also responsible for generating the fine grid eigenvalues which are output into the "evals_coarse" array
ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
int Nconv=0;
IRL.calc(evals_coarse,evec_coarse,src,Nconv,false);
@ -405,6 +427,14 @@ public:
std::cout << i << " Coarse eval = " << evals_coarse[i] << std::endl;
}
}
//Get the fine eigenvector 'i' by reconstruction
void getFineEvecEval(FineField &evec, RealD &eval, const int i) const{
blockPromote(evec_coarse[i],evec,subspace);
eval = evals_coarse[i];
}
};
NAMESPACE_END(Grid);

View File

@ -30,6 +30,8 @@ template<class Field> class PowerMethod
RealD vden = norm2(src_n);
RealD na = vnum/vden;
std::cout << GridLogIterative << "PowerMethod: Current approximation of largest eigenvalue " << na << std::endl;
if ( (fabs(evalMaxApprox/na - 1.0) < 0.001) || (i==_MAX_ITER_EST_-1) ) {
evalMaxApprox = na;
std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;

View File

@ -53,10 +53,11 @@ public:
// Communicator should know nothing of the physics grid, only processor grid.
////////////////////////////////////////////
int _Nprocessors; // How many in all
Coordinate _processors; // Which dimensions get relayed out over processors lanes.
int _processor; // linear processor rank
Coordinate _processor_coor; // linear processor coordinate
unsigned long _ndimension;
Coordinate _shm_processors; // Which dimensions get relayed out over processors lanes.
Coordinate _processors; // Which dimensions get relayed out over processors lanes.
Coordinate _processor_coor; // linear processor coordinate
static Grid_MPI_Comm communicator_world;
Grid_MPI_Comm communicator;
std::vector<Grid_MPI_Comm> communicator_halo;
@ -97,6 +98,7 @@ public:
int BossRank(void) ;
int ThisRank(void) ;
const Coordinate & ThisProcessorCoor(void) ;
const Coordinate & ShmGrid(void) { return _shm_processors; } ;
const Coordinate & ProcessorGrid(void) ;
int ProcessorCount(void) ;
@ -142,16 +144,16 @@ public:
int bytes);
double StencilSendToRecvFrom(void *xmit,
int xmit_to_rank,
int xmit_to_rank,int do_xmit,
void *recv,
int recv_from_rank,
int recv_from_rank,int do_recv,
int bytes,int dir);
double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
int xmit_to_rank,int do_xmit,
void *recv,
int recv_from_rank,
int recv_from_rank,int do_recv,
int bytes,int dir);

View File

@ -106,7 +106,7 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors)
// Remap using the shared memory optimising routine
// The remap creates a comm which must be freed
////////////////////////////////////////////////////
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm);
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm,_shm_processors);
InitFromMPICommunicator(processors,optimal_comm);
SetCommunicator(optimal_comm);
///////////////////////////////////////////////////
@ -124,12 +124,13 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const
int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
Coordinate parent_processor_coor(_ndimension,0);
Coordinate parent_processors (_ndimension,1);
Coordinate shm_processors (_ndimension,1);
// Can make 5d grid from 4d etc...
int pad = _ndimension-parent_ndimension;
for(int d=0;d<parent_ndimension;d++){
parent_processor_coor[pad+d]=parent._processor_coor[d];
parent_processors [pad+d]=parent._processors[d];
shm_processors [pad+d]=parent._shm_processors[d];
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
@ -154,6 +155,7 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const
ccoor[d] = parent_processor_coor[d] % processors[d];
scoor[d] = parent_processor_coor[d] / processors[d];
ssize[d] = parent_processors[d] / processors[d];
if ( processors[d] < shm_processors[d] ) shm_processors[d] = processors[d]; // subnode splitting.
}
// rank within subcomm ; srank is rank of subcomm within blocks of subcomms
@ -335,22 +337,22 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
}
// Basic Halo comms primitive
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
int dest,
int dest, int dox,
void *recv,
int from,
int from, int dor,
int bytes,int dir)
{
std::vector<CommsRequest_t> list;
double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir);
double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,dox,recv,from,dor,bytes,dir);
StencilSendToRecvFromComplete(list,dir);
return offbytes;
}
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
int dest,int dox,
void *recv,
int from,
int from,int dor,
int bytes,int dir)
{
int ncomm =communicator_halo.size();
@ -370,6 +372,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
double off_node_bytes=0.0;
int tag;
if ( dox ) {
if ( (gfrom ==MPI_UNDEFINED) || Stencil_force_mpi ) {
tag= dir+from*32;
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq);
@ -377,7 +380,9 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
list.push_back(rrq);
off_node_bytes+=bytes;
}
}
if (dor) {
if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) {
tag= dir+_processor*32;
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq);
@ -391,6 +396,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
// std::cout <<"acceleratorCopyDeviceToDeviceAsynch"<< std::endl;
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes);
}
}
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
this->StencilSendToRecvFromComplete(list,dir);

View File

@ -45,12 +45,14 @@ void CartesianCommunicator::Init(int *argc, char *** arv)
CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const CartesianCommunicator &parent,int &srank)
: CartesianCommunicator(processors)
{
_shm_processors = Coordinate(processors.size(),1);
srank=0;
SetCommunicator(communicator_world);
}
CartesianCommunicator::CartesianCommunicator(const Coordinate &processors)
{
_shm_processors = Coordinate(processors.size(),1);
_processors = processors;
_ndimension = processors.size(); assert(_ndimension>=1);
_processor_coor.resize(_ndimension);
@ -111,18 +113,18 @@ void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest
}
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
int xmit_to_rank,
int xmit_to_rank,int dox,
void *recv,
int recv_from_rank,
int recv_from_rank,int dor,
int bytes, int dir)
{
return 2.0*bytes;
}
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
int xmit_to_rank,int dox,
void *recv,
int recv_from_rank,
int recv_from_rank,int dor,
int bytes, int dir)
{
return 2.0*bytes;

View File

@ -93,9 +93,10 @@ public:
// Create an optimal reordered communicator that makes MPI_Cart_create get it right
//////////////////////////////////////////////////////////////////////////////////////
static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD
static void OptimalCommunicator (const Coordinate &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian
static void OptimalCommunicatorHypercube (const Coordinate &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian
static void OptimalCommunicatorSharedMemory(const Coordinate &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian
// Turns MPI_COMM_WORLD into right layout for Cartesian
static void OptimalCommunicator (const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &ShmDims);
static void OptimalCommunicatorHypercube (const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &ShmDims);
static void OptimalCommunicatorSharedMemory(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &ShmDims);
static void GetShmDims(const Coordinate &WorldDims,Coordinate &ShmDims);
///////////////////////////////////////////////////
// Provide shared memory facilities off comm world

View File

@ -152,7 +152,7 @@ int Log2Size(int TwoToPower,int MAXLOG2)
}
return log2size;
}
void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_MPI_Comm & optimal_comm)
void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
{
//////////////////////////////////////////////////////////////////////////////
// Look and see if it looks like an HPE 8600 based on hostname conventions
@ -165,8 +165,8 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M
gethostname(name,namelen);
int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
if(nscan==3 && HPEhypercube ) OptimalCommunicatorHypercube(processors,optimal_comm);
else OptimalCommunicatorSharedMemory(processors,optimal_comm);
if(nscan==3 && HPEhypercube ) OptimalCommunicatorHypercube(processors,optimal_comm,SHM);
else OptimalCommunicatorSharedMemory(processors,optimal_comm,SHM);
}
static inline int divides(int a,int b)
{
@ -221,7 +221,7 @@ void GlobalSharedMemory::GetShmDims(const Coordinate &WorldDims,Coordinate &ShmD
dim=(dim+1) %ndimension;
}
}
void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processors,Grid_MPI_Comm & optimal_comm)
void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
{
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
@ -294,6 +294,7 @@ void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processo
Coordinate HyperCoor(ndimension);
GetShmDims(WorldDims,ShmDims);
SHM = ShmDims;
////////////////////////////////////////////////////////////////
// Establish torus of processes and nodes with sub-blockings
@ -341,7 +342,7 @@ void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processo
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
assert(ierr==0);
}
void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const Coordinate &processors,Grid_MPI_Comm & optimal_comm)
void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
{
////////////////////////////////////////////////////////////////
// Identify subblock of ranks on node spreading across dims
@ -353,6 +354,8 @@ void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const Coordinate &proce
Coordinate ShmCoor(ndimension); Coordinate NodeCoor(ndimension); Coordinate WorldCoor(ndimension);
GetShmDims(WorldDims,ShmDims);
SHM=ShmDims;
////////////////////////////////////////////////////////////////
// Establish torus of processes and nodes with sub-blockings
////////////////////////////////////////////////////////////////

View File

@ -48,9 +48,10 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
_ShmSetup=1;
}
void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_MPI_Comm & optimal_comm)
void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
{
optimal_comm = WorldComm;
SHM = Coordinate(processors.size(),1);
}
////////////////////////////////////////////////////////////////////////////////////////////

View File

@ -46,3 +46,4 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/lattice/Lattice_unary.h>
#include <Grid/lattice/Lattice_transfer.h>
#include <Grid/lattice/Lattice_basis.h>
#include <Grid/lattice/Lattice_crc.h>

View File

@ -0,0 +1,55 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/Lattice_crc.h
Copyright (C) 2021
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
template<class vobj> void DumpSliceNorm(std::string s,Lattice<vobj> &f,int mu=-1)
{
auto ff = localNorm2(f);
if ( mu==-1 ) mu = f.Grid()->Nd()-1;
typedef typename vobj::tensor_reduced normtype;
typedef typename normtype::scalar_object scalar;
std::vector<scalar> sff;
sliceSum(ff,sff,mu);
for(int t=0;t<sff.size();t++){
std::cout << s<<" "<<t<<" "<<sff[t]<<std::endl;
}
}
template<class vobj> uint32_t crc(Lattice<vobj> & buf)
{
autoView( buf_v , buf, CpuRead);
return ::crc32(0L,(unsigned char *)&buf_v[0],(size_t)sizeof(vobj)*buf.oSites());
}
#define CRC(U) std::cout << "FingerPrint "<<__FILE__ <<" "<< __LINE__ <<" "<< #U <<" "<<crc(U)<<std::endl;
NAMESPACE_END(Grid);

View File

@ -69,6 +69,7 @@ GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE");
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE");
GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE");
GridLogger GridLogHMC (1, "HMC", GridLogColours, "BLUE");
void GridLogConfigure(std::vector<std::string> &logstreams) {
GridLogError.Active(0);
@ -79,6 +80,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
GridLogPerformance.Active(0);
GridLogIntegrator.Active(1);
GridLogColours.Active(0);
GridLogHMC.Active(1);
for (int i = 0; i < logstreams.size(); i++) {
if (logstreams[i] == std::string("Error")) GridLogError.Active(1);
@ -87,7 +89,8 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1);
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1);
if (logstreams[i] == std::string("Integrator")) GridLogIntegrator.Active(1);
if (logstreams[i] == std::string("NoIntegrator")) GridLogIntegrator.Active(0);
if (logstreams[i] == std::string("NoHMC")) GridLogHMC.Active(0);
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
}
}

View File

@ -182,6 +182,7 @@ extern GridLogger GridLogDebug ;
extern GridLogger GridLogPerformance;
extern GridLogger GridLogIterative ;
extern GridLogger GridLogIntegrator ;
extern GridLogger GridLogHMC;
extern Colours GridLogColours;
std::string demangle(const char* name) ;

View File

@ -39,9 +39,11 @@ using namespace Grid;
////////////////////////////////////////////////////////////////////////////////
class NerscIO : public BinaryIO {
public:
typedef Lattice<vLorentzColourMatrixD> GaugeField;
// Enable/disable exiting if the plaquette in the header does not match the value computed (default true)
static bool & exitOnReadPlaquetteMismatch(){ static bool v=true; return v; }
static inline void truncate(std::string file){
std::ofstream fout(file,std::ios::out);
}
@ -198,7 +200,7 @@ public:
std::cerr << " nersc_csum " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl;
exit(0);
}
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
if(exitOnReadPlaquetteMismatch()) assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
assert(nersc_csum == header.checksum );

View File

@ -63,6 +63,7 @@ static constexpr int Ngp=2; // gparity index range
#define ColourIndex (2)
#define SpinIndex (1)
#define LorentzIndex (0)
#define GparityFlavourIndex (0)
// Also should make these a named enum type
static constexpr int DaggerNo=0;
@ -87,6 +88,8 @@ template<typename T> struct isCoarsened {
template <typename T> using IfCoarsened = Invoke<std::enable_if< isCoarsened<T>::value,int> > ;
template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ;
const int GparityFlavourTensorIndex = 3; //TensorLevel counts from the bottom!
// ChrisK very keen to add extra space for Gparity doubling.
//
// Also add domain wall index, in a way where Wilson operator
@ -110,8 +113,10 @@ template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVec
template<typename vtype> using iSpinColourSpinColourMatrix = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >;
template<typename vtype> using iGparityFlavourVector = iVector<iScalar<iScalar<vtype> >, Ngp>;
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
template<typename vtype> using iGparityFlavourMatrix = iMatrix<iScalar<iScalar<vtype> >, Ngp>;
// Spin matrix
typedef iSpinMatrix<Complex > SpinMatrix;
@ -176,6 +181,16 @@ typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF;
typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD;
//G-parity flavour matrix
typedef iGparityFlavourMatrix<Complex> GparityFlavourMatrix;
typedef iGparityFlavourMatrix<ComplexF> GparityFlavourMatrixF;
typedef iGparityFlavourMatrix<ComplexD> GparityFlavourMatrixD;
typedef iGparityFlavourMatrix<vComplex> vGparityFlavourMatrix;
typedef iGparityFlavourMatrix<vComplexF> vGparityFlavourMatrixF;
typedef iGparityFlavourMatrix<vComplexD> vGparityFlavourMatrixD;
// Spin vector
typedef iSpinVector<Complex > SpinVector;
typedef iSpinVector<ComplexF> SpinVectorF;
@ -221,6 +236,16 @@ typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF;
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
//G-parity flavour vector
typedef iGparityFlavourVector<Complex > GparityFlavourVector;
typedef iGparityFlavourVector<ComplexF> GparityFlavourVectorF;
typedef iGparityFlavourVector<ComplexD> GparityFlavourVectorD;
typedef iGparityFlavourVector<vComplex > vGparityFlavourVector;
typedef iGparityFlavourVector<vComplexF> vGparityFlavourVectorF;
typedef iGparityFlavourVector<vComplexD> vGparityFlavourVectorD;
// singlets
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.

View File

@ -40,6 +40,29 @@ class Action
public:
bool is_smeared = false;
RealD deriv_norm_sum;
RealD deriv_max_sum;
int deriv_num;
RealD deriv_us;
RealD S_us;
RealD refresh_us;
void reset_timer(void) {
deriv_us = S_us = refresh_us = 0.0;
deriv_num=0;
deriv_norm_sum = deriv_max_sum=0.0;
}
void deriv_log(RealD nrm, RealD max) { deriv_max_sum+=max; deriv_norm_sum+=nrm; deriv_num++;}
RealD deriv_max_average(void) { return deriv_max_sum/deriv_num; };
RealD deriv_norm_average(void) { return deriv_norm_sum/deriv_num; };
RealD deriv_timer(void) { return deriv_us; };
RealD S_timer(void) { return deriv_us; };
RealD refresh_timer(void) { return deriv_us; };
void deriv_timer_start(void) { deriv_us-=usecond(); }
void deriv_timer_stop(void) { deriv_us+=usecond(); }
void refresh_timer_start(void) { refresh_us-=usecond(); }
void refresh_timer_stop(void) { refresh_us+=usecond(); }
void S_timer_start(void) { S_us-=usecond(); }
void S_timer_stop(void) { S_us+=usecond(); }
// Heatbath?
virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
virtual RealD S(const GaugeField& U) = 0; // evaluate the action

View File

@ -37,6 +37,10 @@ NAMESPACE_CHECK(ActionSet);
#include <Grid/qcd/action/ActionParams.h>
NAMESPACE_CHECK(ActionParams);
#include <Grid/qcd/action/filters/MomentumFilter.h>
#include <Grid/qcd/action/filters/DirichletFilter.h>
#include <Grid/qcd/action/filters/DDHMCFilter.h>
////////////////////////////////////////////
// Gauge Actions
////////////////////////////////////////////

View File

@ -63,6 +63,7 @@ struct StaggeredImplParams {
RealD, hi,
int, MaxIter,
RealD, tolerance,
RealD, mdtolerance,
int, degree,
int, precision,
int, BoundsCheckFreq);
@ -76,11 +77,13 @@ struct StaggeredImplParams {
RealD tol = 1.0e-8,
int _degree = 10,
int _precision = 64,
int _BoundsCheckFreq=20)
int _BoundsCheckFreq=20,
RealD mdtol = 1.0e-6)
: lo(_lo),
hi(_hi),
MaxIter(_maxit),
tolerance(tol),
mdtolerance(mdtol),
degree(_degree),
precision(_precision),
BoundsCheckFreq(_BoundsCheckFreq){};

View File

@ -68,7 +68,7 @@ public:
///////////////////////////////////////////////////////////////
// Support for MADWF tricks
///////////////////////////////////////////////////////////////
RealD Mass(void) { return mass; };
virtual RealD Mass(void) { return mass; };
void SetMass(RealD _mass) {
mass=_mass;
SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs

View File

@ -49,6 +49,8 @@ public:
virtual FermionField &tmp(void) = 0;
virtual void DirichletBlock(Coordinate & _Block) { assert(0); };
GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know
GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); };

View File

@ -30,6 +30,18 @@ directory
NAMESPACE_BEGIN(Grid);
/*
Policy implementation for G-parity boundary conditions
Rather than treating the gauge field as a flavored field, the Grid implementation of G-parity treats the gauge field as a regular
field with complex conjugate boundary conditions. In order to ensure the second flavor interacts with the conjugate links and the first
with the regular links we overload the functionality of doubleStore, whose purpose is to store the gauge field and the barrel-shifted gauge field
to avoid communicating links when applying the Dirac operator, such that the double-stored field contains also a flavor index which maps to
either the link or the conjugate link. This flavored field is then used by multLink to apply the correct link to a spinor.
Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.
mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
*/
template <class S, class Representation = FundamentalRepresentation, class Options=CoeffReal>
class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Representation::Dimension> > {
public:
@ -113,7 +125,7 @@ public:
|| ((distance== 1)&&(icoor[direction]==1))
|| ((distance==-1)&&(icoor[direction]==0));
permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu]; //only if we are going around the world
permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu] && mmu < Nd-1; //only if we are going around the world in a spatial direction
//Apply the links
int f_upper = permute_lane ? 1 : 0;
@ -139,10 +151,10 @@ public:
assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
assert((sl == 1) || (sl == 2));
if ( SE->_around_the_world && St.parameters.twists[mmu] ) {
//If this site is an global boundary site, perform the G-parity flavor twist
if ( mmu < Nd-1 && SE->_around_the_world && St.parameters.twists[mmu] ) {
if ( sl == 2 ) {
//Only do the twist for lanes on the edge of the physical node
ExtractBuffer<sobj> vals(Nsimd);
extract(chi,vals);
@ -197,6 +209,19 @@ public:
reg = memory;
}
//Poke 'poke_f0' onto flavor 0 and 'poke_f1' onto flavor 1 in direction mu of the doubled gauge field Uds
inline void pokeGparityDoubledGaugeField(DoubledGaugeField &Uds, const GaugeLinkField &poke_f0, const GaugeLinkField &poke_f1, const int mu){
autoView(poke_f0_v, poke_f0, CpuRead);
autoView(poke_f1_v, poke_f1, CpuRead);
autoView(Uds_v, Uds, CpuWrite);
thread_foreach(ss,poke_f0_v,{
Uds_v[ss](0)(mu) = poke_f0_v[ss]();
Uds_v[ss](1)(mu) = poke_f1_v[ss]();
});
}
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
{
conformable(Uds.Grid(),GaugeGrid);
@ -208,13 +233,18 @@ public:
Lattice<iScalar<vInteger> > coor(GaugeGrid);
for(int mu=0;mu<Nd;mu++){
//Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.
//mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
for(int mu=0;mu<Nd-1;mu++){
if( Params.twists[mu] ){
LatticeCoordinate(coor,mu);
}
U = PeekIndex<LorentzIndex>(Umu,mu);
Uconj = conjugate(U);
// Implement the isospin rotation sign on the boundary between f=1 and f=0
// This phase could come from a simple bc 1,1,-1,1 ..
int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
if ( Params.twists[mu] ) {
@ -260,6 +290,38 @@ public:
});
}
}
{ //periodic / antiperiodic temporal BCs
int mu = Nd-1;
int L = GaugeGrid->GlobalDimensions()[mu];
int Lmu = L - 1;
LatticeCoordinate(coor, mu);
U = PeekIndex<LorentzIndex>(Umu, mu); //Get t-directed links
GaugeLinkField *Upoke = &U;
if(Params.twists[mu]){ //antiperiodic
Utmp = where(coor == Lmu, -U, U);
Upoke = &Utmp;
}
Uconj = conjugate(*Upoke); //second flavor interacts with conjugate links
pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu);
//Get the barrel-shifted field
Utmp = adj(Cshift(U, mu, -1)); //is a forward shift!
Upoke = &Utmp;
if(Params.twists[mu]){
U = where(coor == 0, -Utmp, Utmp); //boundary phase
Upoke = &U;
}
Uconj = conjugate(*Upoke);
pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu + 4);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) {
@ -300,27 +362,47 @@ public:
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) {
int Ls=Btilde.Grid()->_fdimensions[0];
int Ls = Btilde.Grid()->_fdimensions[0];
GaugeLinkField tmp(mat.Grid());
tmp = Zero();
{
autoView( tmp_v , tmp, CpuWrite);
autoView( Atilde_v , Atilde, CpuRead);
autoView( Btilde_v , Btilde, CpuRead);
thread_for(ss,tmp.Grid()->oSites(),{
for (int s = 0; s < Ls; s++) {
int sF = s + Ls * ss;
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde_v[sF], Atilde_v[sF]));
tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
GridBase *GaugeGrid = mat.Grid();
Lattice<iScalar<vInteger> > coor(GaugeGrid);
if( Params.twists[mu] ){
LatticeCoordinate(coor,mu);
}
autoView( mat_v , mat, AcceleratorWrite);
autoView( Btilde_v , Btilde, AcceleratorRead);
autoView( Atilde_v , Atilde, AcceleratorRead);
accelerator_for(sss,mat.Grid()->oSites(), FermionField::vector_type::Nsimd(),{
int sU=sss;
typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
ColorMatrixType sum;
zeroit(sum);
for(int s=0;s<Ls;s++){
int sF = s+Ls*sU;
for(int spn=0;spn<Ns;spn++){ //sum over spin
//Flavor 0
auto bb = coalescedRead(Btilde_v[sF](0)(spn) ); //color vector
auto aa = coalescedRead(Atilde_v[sF](0)(spn) );
sum = sum + outerProduct(bb,aa);
//Flavor 1
bb = coalescedRead(Btilde_v[sF](1)(spn) );
aa = coalescedRead(Atilde_v[sF](1)(spn) );
sum = sum + conjugate(outerProduct(bb,aa));
}
}
coalescedWrite(mat_v[sU](mu)(), sum);
});
}
PokeIndex<LorentzIndex>(mat, tmp, mu);
return;
}
};
typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffReal> GparityWilsonImplR; // Real.. whichever prec

View File

@ -75,6 +75,10 @@ public:
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }
int Dirichlet;
Coordinate Block;
/********** Deprecate timers **********/
void Report(void);
void ZeroCounters(void);
double DhopCalls;
@ -174,6 +178,17 @@ public:
GridRedBlackCartesian &FourDimRedBlackGrid,
double _M5,const ImplParams &p= ImplParams());
virtual void DirichletBlock(Coordinate & block)
{
assert(block.size()==Nd+1);
if ( block[0] || block[1] || block[2] || block[3] || block[4] ){
Dirichlet = 1;
Block = block;
Stencil.DirichletBlock(block);
StencilEven.DirichletBlock(block);
StencilOdd.DirichletBlock(block);
}
}
// Constructors
/*
WilsonFermion5D(int simd,

View File

@ -60,7 +60,8 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
UmuOdd (_FourDimRedBlackGrid),
Lebesgue(_FourDimGrid),
LebesgueEvenOdd(_FourDimRedBlackGrid),
_tmp(&FiveDimRedBlackGrid)
_tmp(&FiveDimRedBlackGrid),
Dirichlet(0)
{
// some assertions
assert(FiveDimGrid._ndimension==5);
@ -218,6 +219,14 @@ void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
{
GaugeField HUmu(_Umu.Grid());
HUmu = _Umu*(-0.5);
if ( Dirichlet ) {
std::cout << GridLogMessage << " Dirichlet BCs 5d " <<Block<<std::endl;
Coordinate GaugeBlock(Nd);
for(int d=0;d<Nd;d++) GaugeBlock[d] = Block[d+1];
std::cout << GridLogMessage << " Dirichlet BCs 4d " <<GaugeBlock<<std::endl;
DirichletFilter<GaugeField> Filter(GaugeBlock);
Filter.applyFilter(HUmu);
}
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
pickCheckerboard(Even,UmuEven,Umu);
pickCheckerboard(Odd ,UmuOdd,Umu);

View File

@ -0,0 +1,102 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/hmc/integrators/DirichletFilter.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
//--------------------------------------------------------------------
#pragma once
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////
// DDHMC filter with sub-block size B[mu]
////////////////////////////////////////////////////
template<typename GaugeField>
struct DDHMCFilter: public MomentumFilterBase<GaugeField>
{
Coordinate Block;
int Width;
DDHMCFilter(const Coordinate &_Block,int _Width=2): Block(_Block) { Width=_Width; }
void applyFilter(GaugeField &U) const override
{
GridBase *grid = U.Grid();
Coordinate Global=grid->GlobalDimensions();
GaugeField zzz(grid); zzz = Zero();
LatticeInteger coor(grid);
auto zzz_mu = PeekIndex<LorentzIndex>(zzz,0);
////////////////////////////////////////////////////
// Zero BDY layers
////////////////////////////////////////////////////
std::cout<<GridLogMessage<<" DDHMC Force Filter Block "<<Block<<" width " <<Width<<std::endl;
for(int mu=0;mu<Nd;mu++) {
Integer B1 = Block[mu];
if ( B1 && (B1 <= Global[mu]) ) {
LatticeCoordinate(coor,mu);
////////////////////////////////
// OmegaBar - zero all links contained in slice B-1,0 and
// mu links connecting to Omega
////////////////////////////////
if ( Width==1) {
U = where(mod(coor,B1)==Integer(B1-1),zzz,U);
U = where(mod(coor,B1)==Integer(0) ,zzz,U);
auto U_mu = PeekIndex<LorentzIndex>(U,mu);
U_mu = where(mod(coor,B1)==Integer(B1-2),zzz_mu,U_mu);
PokeIndex<LorentzIndex>(U, U_mu, mu);
}
if ( Width==2) {
U = where(mod(coor,B1)==Integer(B1-2),zzz,U);
U = where(mod(coor,B1)==Integer(B1-1),zzz,U);
U = where(mod(coor,B1)==Integer(0) ,zzz,U);
U = where(mod(coor,B1)==Integer(1) ,zzz,U);
auto U_mu = PeekIndex<LorentzIndex>(U,mu);
U_mu = where(mod(coor,B1)==Integer(B1-3),zzz_mu,U_mu);
PokeIndex<LorentzIndex>(U, U_mu, mu);
}
if ( Width==3) {
U = where(mod(coor,B1)==Integer(B1-3),zzz,U);
U = where(mod(coor,B1)==Integer(B1-2),zzz,U);
U = where(mod(coor,B1)==Integer(B1-1),zzz,U);
U = where(mod(coor,B1)==Integer(0) ,zzz,U);
U = where(mod(coor,B1)==Integer(1) ,zzz,U);
U = where(mod(coor,B1)==Integer(2) ,zzz,U);
auto U_mu = PeekIndex<LorentzIndex>(U,mu);
U_mu = where(mod(coor,B1)==Integer(B1-4),zzz_mu,U_mu);
PokeIndex<LorentzIndex>(U, U_mu, mu);
}
}
}
}
};
NAMESPACE_END(Grid);

View File

@ -0,0 +1,71 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/hmc/integrators/DirichletFilter.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
//--------------------------------------------------------------------
#pragma once
NAMESPACE_BEGIN(Grid);
template<typename MomentaField>
struct DirichletFilter: public MomentumFilterBase<MomentaField>
{
typedef typename MomentaField::vector_type vector_type; //SIMD-vectorized complex type
typedef typename MomentaField::scalar_type scalar_type; //scalar complex type
typedef iScalar<iScalar<iScalar<vector_type> > > ScalarType; //complex phase for each site
Coordinate Block;
DirichletFilter(const Coordinate &_Block): Block(_Block){}
void applyFilter(MomentaField &P) const override
{
GridBase *grid = P.Grid();
typedef decltype(PeekIndex<LorentzIndex>(P, 0)) LatCM;
////////////////////////////////////////////////////
// Zero strictly links crossing between domains
////////////////////////////////////////////////////
LatticeInteger coor(grid);
LatCM zz(grid); zz = Zero();
for(int mu=0;mu<Nd;mu++) {
if ( (Block[mu]) && (Block[mu] < grid->GlobalDimensions()[mu] ) ) {
// If costly could provide Grid earlier and precompute masks
std::cout << " Dirichlet in mu="<<mu<<std::endl;
LatticeCoordinate(coor,mu);
auto P_mu = PeekIndex<LorentzIndex>(P, mu);
P_mu = where(mod(coor,Block[mu])==Integer(Block[mu]-1),zz,P_mu);
PokeIndex<LorentzIndex>(P, P_mu, mu);
}
}
}
};
NAMESPACE_END(Grid);

View File

@ -37,7 +37,7 @@ NAMESPACE_BEGIN(Grid);
template<typename MomentaField>
struct MomentumFilterBase{
virtual void applyFilter(MomentaField &P) const;
virtual void applyFilter(MomentaField &P) const = 0;
};
//Do nothing

View File

@ -14,6 +14,31 @@ NAMESPACE_BEGIN(Grid);
assert( (lambda_max < hi) && " High Bounds Check on operator failed" );
}
template<class Field> void ChebyBoundsCheck(LinearOperatorBase<Field> &HermOp,
Field &GaussNoise,
RealD lo,RealD hi)
{
int orderfilter = 1000;
Chebyshev<Field> Cheb(lo,hi,orderfilter);
GridBase *FermionGrid = GaussNoise.Grid();
Field X(FermionGrid);
Field Z(FermionGrid);
X=GaussNoise;
RealD Nx = norm2(X);
Cheb(HermOp,X,Z);
RealD Nz = norm2(Z);
std::cout << "************************* "<<std::endl;
std::cout << " noise = "<<Nx<<std::endl;
std::cout << " Cheb x noise = "<<Nz<<std::endl;
std::cout << " Ratio = "<<Nz/Nx<<std::endl;
std::cout << "************************* "<<std::endl;
assert( ((Nz/Nx)<1.0) && " ChebyBoundsCheck ");
}
template<class Field> void InverseSqrtBoundsCheck(int MaxIter,double tol,
LinearOperatorBase<Field> &HermOp,
Field &GaussNoise,

View File

@ -0,0 +1,163 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/DomainDecomposedTwoFlavourBoundaryBoson.h
Copyright (C) 2021
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
///////////////////////////////////////
// Two flavour ratio
///////////////////////////////////////
template<class ImplD,class ImplF>
class DomainDecomposedBoundaryTwoFlavourBosonPseudoFermion : public Action<typename ImplD::GaugeField> {
public:
INHERIT_IMPL_TYPES(ImplD);
private:
SchurFactoredFermionOperator<ImplD,ImplF> & NumOp;// the basic operator
RealD InnerStoppingCondition;
RealD ActionStoppingCondition;
RealD DerivativeStoppingCondition;
FermionField Phi; // the pseudo fermion field for this trajectory
public:
DomainDecomposedBoundaryTwoFlavourBosonPseudoFermion(SchurFactoredFermionOperator<ImplD,ImplF> &_NumOp,RealD _DerivativeTol, RealD _ActionTol, RealD _InnerTol=1.0e-6)
: NumOp(_NumOp),
DerivativeStoppingCondition(_DerivativeTol),
ActionStoppingCondition(_ActionTol),
InnerStoppingCondition(_InnerTol),
Phi(_NumOp.FermionGrid()) {};
virtual std::string action_name(){return "DomainDecomposedBoundaryTwoFlavourBosonPseudoFermion";}
virtual std::string LogParameters(){
std::stringstream sstream;
return sstream.str();
}
virtual void refresh(const GaugeField &U, GridSerialRNG& sRNG, GridParallelRNG& pRNG)
{
// P(phi) = e^{- phi^dag P^dag P phi}
//
// NumOp == P
//
// Take phi = P^{-1} eta ; eta = P Phi
//
// P(eta) = e^{- eta^dag eta}
//
// e^{x^2/2 sig^2} => sig^2 = 0.5.
//
// So eta should be of width sig = 1/sqrt(2) and must multiply by 0.707....
//
RealD scale = std::sqrt(0.5);
NumOp.tolinner=InnerStoppingCondition;
NumOp.tol=ActionStoppingCondition;
NumOp.ImportGauge(U);
FermionField eta(NumOp.FermionGrid());
gaussian(pRNG,eta); eta=eta*scale;
NumOp.ProjectBoundaryBar(eta);
//DumpSliceNorm("eta",eta);
NumOp.RInv(eta,Phi);
//DumpSliceNorm("Phi",Phi);
};
//////////////////////////////////////////////////////
// S = phi^dag Pdag P phi
//////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) {
NumOp.tolinner=InnerStoppingCondition;
NumOp.tol=ActionStoppingCondition;
NumOp.ImportGauge(U);
FermionField Y(NumOp.FermionGrid());
NumOp.R(Phi,Y);
RealD action = norm2(Y);
return action;
};
virtual void deriv(const GaugeField &U,GaugeField & dSdU)
{
NumOp.tolinner=InnerStoppingCondition;
NumOp.tol=DerivativeStoppingCondition;
NumOp.ImportGauge(U);
GridBase *fgrid = NumOp.FermionGrid();
GridBase *ugrid = NumOp.GaugeGrid();
FermionField X(fgrid);
FermionField Y(fgrid);
FermionField tmp(fgrid);
GaugeField force(ugrid);
FermionField DobiDdbPhi(fgrid); // Vector A in my notes
FermionField DoiDdDobiDdbPhi(fgrid); // Vector B in my notes
FermionField DoidP_Phi(fgrid); // Vector E in my notes
FermionField DobidDddDoidP_Phi(fgrid); // Vector F in my notes
FermionField P_Phi(fgrid);
// P term
NumOp.dBoundaryBar(Phi,tmp);
NumOp.dOmegaBarInv(tmp,DobiDdbPhi); // Vector A
NumOp.dBoundary(DobiDdbPhi,tmp);
NumOp.dOmegaInv(tmp,DoiDdDobiDdbPhi); // Vector B
P_Phi = Phi - DoiDdDobiDdbPhi;
NumOp.ProjectBoundaryBar(P_Phi);
// P^dag P term
NumOp.dOmegaDagInv(P_Phi,DoidP_Phi); // Vector E
NumOp.dBoundaryDag(DoidP_Phi,tmp);
NumOp.dOmegaBarDagInv(tmp,DobidDddDoidP_Phi); // Vector F
NumOp.dBoundaryBarDag(DobidDddDoidP_Phi,tmp);
X = DobiDdbPhi;
Y = DobidDddDoidP_Phi;
NumOp.DirichletFermOpD.MDeriv(force,Y,X,DaggerNo); dSdU=force;
NumOp.DirichletFermOpD.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU+force;
X = DoiDdDobiDdbPhi;
Y = DoidP_Phi;
NumOp.DirichletFermOpD.MDeriv(force,Y,X,DaggerNo); dSdU=dSdU+force;
NumOp.DirichletFermOpD.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU+force;
dSdU *= -1.0;
};
};
NAMESPACE_END(Grid);

View File

@ -0,0 +1,158 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/DomainDecomposedTwoFlavourBoundary.h
Copyright (C) 2021
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
///////////////////////////////////////
// Two flavour ratio
///////////////////////////////////////
template<class ImplD,class ImplF>
class DomainDecomposedBoundaryTwoFlavourPseudoFermion : public Action<typename ImplD::GaugeField> {
public:
INHERIT_IMPL_TYPES(ImplD);
private:
SchurFactoredFermionOperator<ImplD,ImplF> & DenOp;// the basic operator
RealD ActionStoppingCondition;
RealD DerivativeStoppingCondition;
RealD InnerStoppingCondition;
FermionField Phi; // the pseudo fermion field for this trajectory
RealD refresh_action;
public:
DomainDecomposedBoundaryTwoFlavourPseudoFermion(SchurFactoredFermionOperator<ImplD,ImplF> &_DenOp,RealD _DerivativeTol, RealD _ActionTol, RealD _InnerTol = 1.0e-6 )
: DenOp(_DenOp),
DerivativeStoppingCondition(_DerivativeTol),
ActionStoppingCondition(_ActionTol),
InnerStoppingCondition(_InnerTol),
Phi(_DenOp.FermionGrid()) {};
virtual std::string action_name(){return "DomainDecomposedBoundaryTwoFlavourPseudoFermion";}
virtual std::string LogParameters(){
std::stringstream sstream;
return sstream.str();
}
virtual void refresh(const GaugeField &U, GridSerialRNG& sRNG, GridParallelRNG& pRNG)
{
// P(phi) = e^{- phi^dag Rdag^-1 R^-1 phi}
//
// DenOp == R
//
// Take phi = R eta ; eta = R^-1 Phi
//
// P(eta) = e^{- eta^dag eta}
//
// e^{x^2/2 sig^2} => sig^2 = 0.5.
//
// So eta should be of width sig = 1/sqrt(2) and must multiply by 0.707....
//
RealD scale = std::sqrt(0.5);
DenOp.tolinner=InnerStoppingCondition;
DenOp.tol =ActionStoppingCondition;
DenOp.ImportGauge(U);
FermionField eta(DenOp.FermionGrid());
gaussian(pRNG,eta); eta=eta*scale;
DenOp.ProjectBoundaryBar(eta);
DenOp.R(eta,Phi);
//DumpSliceNorm("Phi",Phi);
refresh_action = norm2(eta);
};
//////////////////////////////////////////////////////
// S = phi^dag Rdag^-1 R^-1 phi
//////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) {
DenOp.tolinner=InnerStoppingCondition;
DenOp.tol=ActionStoppingCondition;
DenOp.ImportGauge(U);
FermionField X(DenOp.FermionGrid());
DenOp.RInv(Phi,X);
RealD action = norm2(X);
return action;
};
virtual void deriv(const GaugeField &U,GaugeField & dSdU)
{
DenOp.tolinner=InnerStoppingCondition;
DenOp.tol=DerivativeStoppingCondition;
DenOp.ImportGauge(U);
GridBase *fgrid = DenOp.FermionGrid();
GridBase *ugrid = DenOp.GaugeGrid();
FermionField X(fgrid);
FermionField Y(fgrid);
FermionField tmp(fgrid);
GaugeField force(ugrid);
FermionField DiDdb_Phi(fgrid); // Vector C in my notes
FermionField DidRinv_Phi(fgrid); // Vector D in my notes
FermionField Rinv_Phi(fgrid);
// FermionField RinvDagRinv_Phi(fgrid);
// FermionField DdbdDidRinv_Phi(fgrid);
// R^-1 term
DenOp.dBoundaryBar(Phi,tmp);
DenOp.Dinverse(tmp,DiDdb_Phi); // Vector C
Rinv_Phi = Phi - DiDdb_Phi;
DenOp.ProjectBoundaryBar(Rinv_Phi);
// R^-dagger R^-1 term
DenOp.DinverseDag(Rinv_Phi,DidRinv_Phi); // Vector D
/*
DenOp.dBoundaryBarDag(DidRinv_Phi,DdbdDidRinv_Phi);
RinvDagRinv_Phi = Rinv_Phi - DdbdDidRinv_Phi;
DenOp.ProjectBoundaryBar(RinvDagRinv_Phi);
*/
X = DiDdb_Phi;
Y = DidRinv_Phi;
DenOp.PeriodicFermOpD.MDeriv(force,Y,X,DaggerNo); dSdU=force;
DenOp.PeriodicFermOpD.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU+force;
DumpSliceNorm("force",dSdU);
dSdU *= -1.0;
};
};
NAMESPACE_END(Grid);

View File

@ -0,0 +1,237 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/DomainDecomposedTwoFlavourBoundary.h
Copyright (C) 2021
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
///////////////////////////////////////
// Two flavour ratio
///////////////////////////////////////
template<class ImplD,class ImplF>
class DomainDecomposedBoundaryTwoFlavourRatioPseudoFermion : public Action<typename ImplD::GaugeField> {
public:
INHERIT_IMPL_TYPES(ImplD);
private:
SchurFactoredFermionOperator<ImplD,ImplF> & NumOp;// the basic operator
SchurFactoredFermionOperator<ImplD,ImplF> & DenOp;// the basic operator
RealD InnerStoppingCondition;
RealD ActionStoppingCondition;
RealD DerivativeStoppingCondition;
FermionField Phi; // the pseudo fermion field for this trajectory
public:
DomainDecomposedBoundaryTwoFlavourRatioPseudoFermion(SchurFactoredFermionOperator<ImplD,ImplF> &_NumOp,
SchurFactoredFermionOperator<ImplD,ImplF> &_DenOp,
RealD _DerivativeTol, RealD _ActionTol, RealD _InnerTol=1.0e-6)
: NumOp(_NumOp), DenOp(_DenOp),
Phi(_NumOp.PeriodicFermOpD.FermionGrid()),
InnerStoppingCondition(_InnerTol),
DerivativeStoppingCondition(_DerivativeTol),
ActionStoppingCondition(_ActionTol)
{};
virtual std::string action_name(){return "DomainDecomposedBoundaryTwoFlavourRatioPseudoFermion";}
virtual std::string LogParameters(){
std::stringstream sstream;
return sstream.str();
}
virtual void refresh(const GaugeField &U, GridSerialRNG& sRNG, GridParallelRNG& pRNG)
{
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
FermionField eta(NumOp.PeriodicFermOpD.FermionGrid());
FermionField tmp(NumOp.PeriodicFermOpD.FermionGrid());
// P(phi) = e^{- phi^dag P^dag Rdag^-1 R^-1 P phi}
//
// NumOp == P
// DenOp == R
//
// Take phi = P^{-1} R eta ; eta = R^-1 P Phi
//
// P(eta) = e^{- eta^dag eta}
//
// e^{x^2/2 sig^2} => sig^2 = 0.5.
//
// So eta should be of width sig = 1/sqrt(2) and must multiply by 0.707....
//
RealD scale = std::sqrt(0.5);
gaussian(pRNG,eta); eta=eta*scale;
NumOp.ProjectBoundaryBar(eta);
NumOp.tolinner=InnerStoppingCondition;
DenOp.tolinner=InnerStoppingCondition;
DenOp.tol = ActionStoppingCondition;
NumOp.tol = ActionStoppingCondition;
DenOp.R(eta,tmp);
NumOp.RInv(tmp,Phi);
DumpSliceNorm("Phi",Phi);
};
//////////////////////////////////////////////////////
// S = phi^dag Pdag Rdag^-1 R^-1 P phi
//////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) {
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
FermionField X(NumOp.PeriodicFermOpD.FermionGrid());
FermionField Y(NumOp.PeriodicFermOpD.FermionGrid());
NumOp.tolinner=InnerStoppingCondition;
DenOp.tolinner=InnerStoppingCondition;
DenOp.tol = ActionStoppingCondition;
NumOp.tol = ActionStoppingCondition;
NumOp.R(Phi,Y);
DenOp.RInv(Y,X);
RealD action = norm2(X);
// std::cout << " DD boundary action is " <<action<<std::endl;
return action;
};
virtual void deriv(const GaugeField &U,GaugeField & dSdU)
{
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
GridBase *fgrid = NumOp.PeriodicFermOpD.FermionGrid();
GridBase *ugrid = NumOp.PeriodicFermOpD.GaugeGrid();
FermionField X(fgrid);
FermionField Y(fgrid);
FermionField tmp(fgrid);
GaugeField force(ugrid);
FermionField DobiDdbPhi(fgrid); // Vector A in my notes
FermionField DoiDdDobiDdbPhi(fgrid); // Vector B in my notes
FermionField DiDdbP_Phi(fgrid); // Vector C in my notes
FermionField DidRinvP_Phi(fgrid); // Vector D in my notes
FermionField DdbdDidRinvP_Phi(fgrid);
FermionField DoidRinvDagRinvP_Phi(fgrid); // Vector E in my notes
FermionField DobidDddDoidRinvDagRinvP_Phi(fgrid); // Vector F in my notes
FermionField P_Phi(fgrid);
FermionField RinvP_Phi(fgrid);
FermionField RinvDagRinvP_Phi(fgrid);
FermionField PdagRinvDagRinvP_Phi(fgrid);
// RealD action = S(U);
NumOp.tolinner=InnerStoppingCondition;
DenOp.tolinner=InnerStoppingCondition;
DenOp.tol = DerivativeStoppingCondition;
NumOp.tol = DerivativeStoppingCondition;
// P term
NumOp.dBoundaryBar(Phi,tmp);
NumOp.dOmegaBarInv(tmp,DobiDdbPhi); // Vector A
NumOp.dBoundary(DobiDdbPhi,tmp);
NumOp.dOmegaInv(tmp,DoiDdDobiDdbPhi); // Vector B
P_Phi = Phi - DoiDdDobiDdbPhi;
NumOp.ProjectBoundaryBar(P_Phi);
// R^-1 P term
DenOp.dBoundaryBar(P_Phi,tmp);
DenOp.Dinverse(tmp,DiDdbP_Phi); // Vector C
RinvP_Phi = P_Phi - DiDdbP_Phi;
DenOp.ProjectBoundaryBar(RinvP_Phi); // Correct to here
// R^-dagger R^-1 P term
DenOp.DinverseDag(RinvP_Phi,DidRinvP_Phi); // Vector D
DenOp.dBoundaryBarDag(DidRinvP_Phi,DdbdDidRinvP_Phi);
RinvDagRinvP_Phi = RinvP_Phi - DdbdDidRinvP_Phi;
DenOp.ProjectBoundaryBar(RinvDagRinvP_Phi);
// P^dag R^-dagger R^-1 P term
NumOp.dOmegaDagInv(RinvDagRinvP_Phi,DoidRinvDagRinvP_Phi); // Vector E
NumOp.dBoundaryDag(DoidRinvDagRinvP_Phi,tmp);
NumOp.dOmegaBarDagInv(tmp,DobidDddDoidRinvDagRinvP_Phi); // Vector F
NumOp.dBoundaryBarDag(DobidDddDoidRinvDagRinvP_Phi,tmp);
PdagRinvDagRinvP_Phi = RinvDagRinvP_Phi- tmp;
NumOp.ProjectBoundaryBar(PdagRinvDagRinvP_Phi);
/*
std::cout << "S eval "<< action << std::endl;
std::cout << "S - IP1 "<< innerProduct(Phi,PdagRinvDagRinvP_Phi) << std::endl;
std::cout << "S - IP2 "<< norm2(RinvP_Phi) << std::endl;
NumOp.R(Phi,tmp);
tmp = tmp - P_Phi;
std::cout << "diff1 "<<norm2(tmp) <<std::endl;
DenOp.RInv(P_Phi,tmp);
tmp = tmp - RinvP_Phi;
std::cout << "diff2 "<<norm2(tmp) <<std::endl;
DenOp.RDagInv(RinvP_Phi,tmp);
tmp = tmp - RinvDagRinvP_Phi;
std::cout << "diff3 "<<norm2(tmp) <<std::endl;
DenOp.RDag(RinvDagRinvP_Phi,tmp);
tmp = tmp - PdagRinvDagRinvP_Phi;
std::cout << "diff4 "<<norm2(tmp) <<std::endl;
*/
dSdU=Zero();
X = DobiDdbPhi;
Y = DobidDddDoidRinvDagRinvP_Phi;
NumOp.DirichletFermOpD.MDeriv(force,Y,X,DaggerNo); dSdU=dSdU+force;
NumOp.DirichletFermOpD.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU+force;
X = DoiDdDobiDdbPhi;
Y = DoidRinvDagRinvP_Phi;
NumOp.DirichletFermOpD.MDeriv(force,Y,X,DaggerNo); dSdU=dSdU+force;
NumOp.DirichletFermOpD.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU+force;
X = DiDdbP_Phi;
Y = DidRinvP_Phi;
DenOp.PeriodicFermOpD.MDeriv(force,Y,X,DaggerNo); dSdU=dSdU+force;
DenOp.PeriodicFermOpD.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU+force;
dSdU *= -1.0;
};
};
NAMESPACE_END(Grid);

View File

@ -59,6 +59,7 @@ NAMESPACE_BEGIN(Grid);
FermionOperator<Impl> & DenOp;// the basic operator
FermionField PhiEven; // the pseudo fermion field for this trajectory
FermionField PhiOdd; // the pseudo fermion field for this trajectory
FermionField Noise; // spare noise field for bounds check
public:
@ -70,6 +71,7 @@ NAMESPACE_BEGIN(Grid);
DenOp(_DenOp),
PhiOdd (_NumOp.FermionRedBlackGrid()),
PhiEven(_NumOp.FermionRedBlackGrid()),
Noise(_NumOp.FermionRedBlackGrid()),
param(p)
{
AlgRemez remez(param.lo,param.hi,param.precision);
@ -87,7 +89,11 @@ NAMESPACE_BEGIN(Grid);
PowerNegQuarter.Init(remez,param.tolerance,true);
};
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
virtual std::string action_name(){
std::stringstream sstream;
sstream<< "OneFlavourEvenOddRatioRationalPseudoFermionAction det("<< DenOp.Mass() << ") / det("<<NumOp.Mass()<<")";
return sstream.str();
}
virtual std::string LogParameters(){
std::stringstream sstream;
@ -128,6 +134,7 @@ NAMESPACE_BEGIN(Grid);
pickCheckerboard(Even,etaEven,eta);
pickCheckerboard(Odd,etaOdd,eta);
Noise = etaOdd;
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
@ -175,9 +182,10 @@ NAMESPACE_BEGIN(Grid);
grid->Broadcast(0,r);
if ( (r%param.BoundsCheckFreq)==0 ) {
FermionField gauss(NumOp.FermionRedBlackGrid());
gauss = PhiOdd;
gauss = Noise;
HighBoundCheck(MdagM,gauss,param.hi);
InverseSqrtBoundsCheck(param.MaxIter,param.tolerance*100,MdagM,gauss,PowerNegHalf);
ChebyBoundsCheck(MdagM,Noise,param.lo,param.hi);
}
// Phidag VdagV^1/4 MdagM^-1/4 MdagM^-1/4 VdagV^1/4 Phi

View File

@ -49,10 +49,12 @@ NAMESPACE_BEGIN(Grid);
Params param;
MultiShiftFunction PowerHalf ;
MultiShiftFunction PowerNegHalf;
MultiShiftFunction PowerQuarter;
MultiShiftFunction PowerNegHalf;
MultiShiftFunction PowerNegQuarter;
MultiShiftFunction MDPowerQuarter;
MultiShiftFunction MDPowerNegHalf;
private:
FermionOperator<Impl> & NumOp;// the basic operator
@ -79,6 +81,10 @@ NAMESPACE_BEGIN(Grid);
remez.generateApprox(param.degree,1,4);
PowerQuarter.Init(remez,param.tolerance,false);
PowerNegQuarter.Init(remez,param.tolerance,true);
// Derive solves different tol
MDPowerQuarter.Init(remez,param.mdtolerance,false);
MDPowerNegHalf.Init(remez,param.mdtolerance,true);
};
virtual std::string action_name(){return "OneFlavourRatioRationalPseudoFermionAction";}
@ -204,8 +210,8 @@ NAMESPACE_BEGIN(Grid);
virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
const int n_f = PowerNegHalf.poles.size();
const int n_pv = PowerQuarter.poles.size();
const int n_f = MDPowerNegHalf.poles.size();
const int n_pv = MDPowerQuarter.poles.size();
std::vector<FermionField> MpvPhi_k (n_pv,NumOp.FermionGrid());
std::vector<FermionField> MpvMfMpvPhi_k(n_pv,NumOp.FermionGrid());
@ -224,8 +230,8 @@ NAMESPACE_BEGIN(Grid);
MdagMLinearOperator<FermionOperator<Impl> ,FermionField> MdagM(DenOp);
MdagMLinearOperator<FermionOperator<Impl> ,FermionField> VdagV(NumOp);
ConjugateGradientMultiShift<FermionField> msCG_V(param.MaxIter,PowerQuarter);
ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,PowerNegHalf);
ConjugateGradientMultiShift<FermionField> msCG_V(param.MaxIter,MDPowerQuarter);
ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,MDPowerNegHalf);
msCG_V(VdagV,Phi,MpvPhi_k,MpvPhi);
msCG_M(MdagM,MpvPhi,MfMpvPhi_k,MfMpvPhi);
@ -244,7 +250,7 @@ NAMESPACE_BEGIN(Grid);
//(1)
for(int k=0;k<n_f;k++){
ak = PowerNegHalf.residues[k];
ak = MDPowerNegHalf.residues[k];
DenOp.M(MfMpvPhi_k[k],Y);
DenOp.MDeriv(tmp , MfMpvPhi_k[k], Y,DaggerYes ); dSdU=dSdU+ak*tmp;
DenOp.MDeriv(tmp , Y, MfMpvPhi_k[k], DaggerNo ); dSdU=dSdU+ak*tmp;
@ -254,7 +260,7 @@ NAMESPACE_BEGIN(Grid);
//(3)
for(int k=0;k<n_pv;k++){
ak = PowerQuarter.residues[k];
ak = MDPowerQuarter.residues[k];
NumOp.M(MpvPhi_k[k],Y);
NumOp.MDeriv(tmp,MpvMfMpvPhi_k[k],Y,DaggerYes); dSdU=dSdU+ak*tmp;

View File

@ -75,11 +75,15 @@ NAMESPACE_BEGIN(Grid);
conformable(_NumOp.GaugeRedBlackGrid(), _DenOp.GaugeRedBlackGrid());
};
virtual std::string action_name(){return "TwoFlavourEvenOddRatioPseudoFermionAction";}
virtual std::string action_name(){
std::stringstream sstream;
sstream<<"TwoFlavourEvenOddRatioPseudoFermionAction det("<<DenOp.Mass()<<") / det("<<NumOp.Mass()<<")";
return sstream.str();
}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name()<<"] has no parameters" << std::endl;
sstream<< GridLogMessage << "["<<action_name()<<"] -- No further parameters "<<std::endl;
return sstream.str();
}

View File

@ -0,0 +1,203 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/TwoFlavourRatio.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
///////////////////////////////////////
// Two flavour ratio
///////////////////////////////////////
template<class Impl>
class TwoFlavourRatioEO4DPseudoFermionAction : public Action<typename Impl::GaugeField> {
public:
INHERIT_IMPL_TYPES(Impl);
private:
typedef FermionOperator<Impl> FermOp;
FermionOperator<Impl> & NumOp;// the basic operator
FermionOperator<Impl> & DenOp;// the basic operator
OperatorFunction<FermionField> &DerivativeSolver;
OperatorFunction<FermionField> &DerivativeDagSolver;
OperatorFunction<FermionField> &ActionSolver;
OperatorFunction<FermionField> &HeatbathSolver;
FermionField phi4; // the pseudo fermion field for this trajectory
public:
TwoFlavourRatioEO4DPseudoFermionAction(FermionOperator<Impl> &_NumOp,
FermionOperator<Impl> &_DenOp,
OperatorFunction<FermionField> & DS,
OperatorFunction<FermionField> & AS ) :
TwoFlavourRatioEO4DPseudoFermionAction(_NumOp,_DenOp, DS,DS,AS,AS) {};
TwoFlavourRatioEO4DPseudoFermionAction(FermionOperator<Impl> &_NumOp,
FermionOperator<Impl> &_DenOp,
OperatorFunction<FermionField> & DS,
OperatorFunction<FermionField> & DDS,
OperatorFunction<FermionField> & AS,
OperatorFunction<FermionField> & HS
) : NumOp(_NumOp),
DenOp(_DenOp),
DerivativeSolver(DS),
DerivativeDagSolver(DDS),
ActionSolver(AS),
HeatbathSolver(HS),
phi4(_NumOp.GaugeGrid())
{};
virtual std::string action_name(){return "TwoFlavourRatioEO4DPseudoFermionAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name()<<"] has no parameters" << std::endl;
return sstream.str();
}
virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) {
// P(phi) = e^{- phi^dag (V^dag M^-dag)_11 (M^-1 V)_11 phi}
//
// NumOp == V
// DenOp == M
//
// Take phi = (V^{-1} M)_11 eta ; eta = (M^{-1} V)_11 Phi
//
// P(eta) = e^{- eta^dag eta}
//
// e^{x^2/2 sig^2} => sig^2 = 0.5.
//
// So eta should be of width sig = 1/sqrt(2) and must multiply by 0.707....
//
RealD scale = std::sqrt(0.5);
FermionField eta4(NumOp.GaugeGrid());
FermionField eta5(NumOp.FermionGrid());
FermionField tmp(NumOp.FermionGrid());
FermionField phi5(NumOp.FermionGrid());
gaussian(pRNG,eta4);
NumOp.ImportFourDimPseudoFermion(eta4,eta5);
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
SchurRedBlackDiagMooeeSolve<FermionField> PrecSolve(HeatbathSolver);
DenOp.M(eta5,tmp); // M eta
PrecSolve(NumOp,tmp,phi5); // phi = V^-1 M eta
phi5=phi5*scale;
std::cout << GridLogMessage << "4d pf refresh "<< norm2(phi5)<<"\n";
// Project to 4d
NumOp.ExportFourDimPseudoFermion(phi5,phi4);
};
//////////////////////////////////////////////////////
// S = phi^dag (V^dag M^-dag)_11 (M^-1 V)_11 phi
//////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) {
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
FermionField Y4(NumOp.GaugeGrid());
FermionField X(NumOp.FermionGrid());
FermionField Y(NumOp.FermionGrid());
FermionField phi5(NumOp.FermionGrid());
MdagMLinearOperator<FermionOperator<Impl> ,FermionField> MdagMOp(DenOp);
SchurRedBlackDiagMooeeSolve<FermionField> PrecSolve(ActionSolver);
NumOp.ImportFourDimPseudoFermion(phi4,phi5);
NumOp.M(phi5,X); // X= V phi
PrecSolve(DenOp,X,Y); // Y= (MdagM)^-1 Mdag Vdag phi = M^-1 V phi
NumOp.ExportFourDimPseudoFermion(Y,Y4);
RealD action = norm2(Y4);
return action;
};
//////////////////////////////////////////////////////
// dS/du = 2 Re phi^dag (V^dag M^-dag)_11 (M^-1 d V)_11 phi
// - 2 Re phi^dag (dV^dag M^-dag)_11 (M^-1 dM M^-1 V)_11 phi
//////////////////////////////////////////////////////
virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
NumOp.ImportGauge(U);
DenOp.ImportGauge(U);
FermionField X(NumOp.FermionGrid());
FermionField Y(NumOp.FermionGrid());
FermionField phi(NumOp.FermionGrid());
FermionField Vphi(NumOp.FermionGrid());
FermionField MinvVphi(NumOp.FermionGrid());
FermionField tmp4(NumOp.GaugeGrid());
FermionField MdagInvMinvVphi(NumOp.FermionGrid());
GaugeField force(NumOp.GaugeGrid());
//Y=V phi
//X = (Mdag V phi
//Y = (Mdag M)^-1 Mdag V phi = M^-1 V Phi
NumOp.ImportFourDimPseudoFermion(phi4,phi);
NumOp.M(phi,Vphi); // V phi
SchurRedBlackDiagMooeeSolve<FermionField> PrecSolve(DerivativeSolver);
PrecSolve(DenOp,Vphi,MinvVphi);// M^-1 V phi
std::cout << GridLogMessage << "4d deriv solve "<< norm2(MinvVphi)<<"\n";
// Projects onto the physical space and back
NumOp.ExportFourDimPseudoFermion(MinvVphi,tmp4);
NumOp.ImportFourDimPseudoFermion(tmp4,Y);
SchurRedBlackDiagMooeeDagSolve<FermionField> PrecDagSolve(DerivativeDagSolver);
// X = proj M^-dag V phi
// Need an adjoint solve
PrecDagSolve(DenOp,Y,MdagInvMinvVphi);
std::cout << GridLogMessage << "4d deriv solve dag "<< norm2(MdagInvMinvVphi)<<"\n";
// phi^dag (Vdag Mdag^-1) (M^-1 dV) phi
NumOp.MDeriv(force ,MdagInvMinvVphi , phi, DaggerNo ); dSdU=force;
// phi^dag (dVdag Mdag^-1) (M^-1 V) phi
NumOp.MDeriv(force , phi, MdagInvMinvVphi ,DaggerYes ); dSdU=dSdU+force;
// - 2 Re phi^dag (dV^dag M^-dag)_11 (M^-1 dM M^-1 V)_11 phi
DenOp.MDeriv(force,MdagInvMinvVphi,MinvVphi,DaggerNo); dSdU=dSdU-force;
DenOp.MDeriv(force,MinvVphi,MdagInvMinvVphi,DaggerYes); dSdU=dSdU-force;
dSdU *= -1.0;
//dSdU = - Ta(dSdU);
};
};
NAMESPACE_END(Grid);

View File

@ -0,0 +1,6 @@
#ifndef GRID_GPARITY_H_
#define GRID_GPARITY_H_
#include<Grid/qcd/gparity/GparityFlavour.h>
#endif

View File

@ -0,0 +1,34 @@
#include <Grid/Grid.h>
NAMESPACE_BEGIN(Grid);
const std::array<const GparityFlavour, 3> GparityFlavour::sigma_mu = {{
GparityFlavour(GparityFlavour::Algebra::SigmaX),
GparityFlavour(GparityFlavour::Algebra::SigmaY),
GparityFlavour(GparityFlavour::Algebra::SigmaZ)
}};
const std::array<const GparityFlavour, 6> GparityFlavour::sigma_all = {{
GparityFlavour(GparityFlavour::Algebra::Identity),
GparityFlavour(GparityFlavour::Algebra::SigmaX),
GparityFlavour(GparityFlavour::Algebra::SigmaY),
GparityFlavour(GparityFlavour::Algebra::SigmaZ),
GparityFlavour(GparityFlavour::Algebra::ProjPlus),
GparityFlavour(GparityFlavour::Algebra::ProjMinus)
}};
const std::array<const char *, GparityFlavour::nSigma> GparityFlavour::name = {{
"SigmaX",
"MinusSigmaX",
"SigmaY",
"MinusSigmaY",
"SigmaZ",
"MinusSigmaZ",
"Identity",
"MinusIdentity",
"ProjPlus",
"MinusProjPlus",
"ProjMinus",
"MinusProjMinus"}};
NAMESPACE_END(Grid);

View File

@ -0,0 +1,475 @@
#ifndef GRID_QCD_GPARITY_FLAVOUR_H
#define GRID_QCD_GPARITY_FLAVOUR_H
//Support for flavour-matrix operations acting on the G-parity flavour index
#include <array>
NAMESPACE_BEGIN(Grid);
class GparityFlavour {
public:
GRID_SERIALIZABLE_ENUM(Algebra, undef,
SigmaX, 0,
MinusSigmaX, 1,
SigmaY, 2,
MinusSigmaY, 3,
SigmaZ, 4,
MinusSigmaZ, 5,
Identity, 6,
MinusIdentity, 7,
ProjPlus, 8,
MinusProjPlus, 9,
ProjMinus, 10,
MinusProjMinus, 11
);
static constexpr unsigned int nSigma = 12;
static const std::array<const char *, nSigma> name;
static const std::array<const GparityFlavour, 3> sigma_mu;
static const std::array<const GparityFlavour, 6> sigma_all;
Algebra g;
public:
accelerator GparityFlavour(Algebra initg): g(initg) {}
};
// 0 1 x vector
// 1 0
template<class vtype>
accelerator_inline void multFlavourSigmaX(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = rhs(1);
ret(1) = rhs(0);
};
template<class vtype>
accelerator_inline void lmultFlavourSigmaX(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(1,0);
ret(0,1) = rhs(1,1);
ret(1,0) = rhs(0,0);
ret(1,1) = rhs(0,1);
};
template<class vtype>
accelerator_inline void rmultFlavourSigmaX(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(0,1);
ret(0,1) = rhs(0,0);
ret(1,0) = rhs(1,1);
ret(1,1) = rhs(1,0);
};
template<class vtype>
accelerator_inline void multFlavourMinusSigmaX(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = -rhs(1);
ret(1) = -rhs(0);
};
template<class vtype>
accelerator_inline void lmultFlavourMinusSigmaX(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(1,0);
ret(0,1) = -rhs(1,1);
ret(1,0) = -rhs(0,0);
ret(1,1) = -rhs(0,1);
};
template<class vtype>
accelerator_inline void rmultFlavourMinusSigmaX(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(0,1);
ret(0,1) = -rhs(0,0);
ret(1,0) = -rhs(1,1);
ret(1,1) = -rhs(1,0);
};
// 0 -i x vector
// i 0
template<class vtype>
accelerator_inline void multFlavourSigmaY(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = timesMinusI(rhs(1));
ret(1) = timesI(rhs(0));
};
template<class vtype>
accelerator_inline void lmultFlavourSigmaY(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = timesMinusI(rhs(1,0));
ret(0,1) = timesMinusI(rhs(1,1));
ret(1,0) = timesI(rhs(0,0));
ret(1,1) = timesI(rhs(0,1));
};
template<class vtype>
accelerator_inline void rmultFlavourSigmaY(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = timesI(rhs(0,1));
ret(0,1) = timesMinusI(rhs(0,0));
ret(1,0) = timesI(rhs(1,1));
ret(1,1) = timesMinusI(rhs(1,0));
};
template<class vtype>
accelerator_inline void multFlavourMinusSigmaY(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = timesI(rhs(1));
ret(1) = timesMinusI(rhs(0));
};
template<class vtype>
accelerator_inline void lmultFlavourMinusSigmaY(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = timesI(rhs(1,0));
ret(0,1) = timesI(rhs(1,1));
ret(1,0) = timesMinusI(rhs(0,0));
ret(1,1) = timesMinusI(rhs(0,1));
};
template<class vtype>
accelerator_inline void rmultFlavourMinusSigmaY(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = timesMinusI(rhs(0,1));
ret(0,1) = timesI(rhs(0,0));
ret(1,0) = timesMinusI(rhs(1,1));
ret(1,1) = timesI(rhs(1,0));
};
// 1 0 x vector
// 0 -1
template<class vtype>
accelerator_inline void multFlavourSigmaZ(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = rhs(0);
ret(1) = -rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourSigmaZ(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(0,0);
ret(0,1) = rhs(0,1);
ret(1,0) = -rhs(1,0);
ret(1,1) = -rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourSigmaZ(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(0,0);
ret(0,1) = -rhs(0,1);
ret(1,0) = rhs(1,0);
ret(1,1) = -rhs(1,1);
};
template<class vtype>
accelerator_inline void multFlavourMinusSigmaZ(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = -rhs(0);
ret(1) = rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourMinusSigmaZ(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(0,0);
ret(0,1) = -rhs(0,1);
ret(1,0) = rhs(1,0);
ret(1,1) = rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourMinusSigmaZ(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(0,0);
ret(0,1) = rhs(0,1);
ret(1,0) = -rhs(1,0);
ret(1,1) = rhs(1,1);
};
template<class vtype>
accelerator_inline void multFlavourIdentity(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = rhs(0);
ret(1) = rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourIdentity(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(0,0);
ret(0,1) = rhs(0,1);
ret(1,0) = rhs(1,0);
ret(1,1) = rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourIdentity(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = rhs(0,0);
ret(0,1) = rhs(0,1);
ret(1,0) = rhs(1,0);
ret(1,1) = rhs(1,1);
};
template<class vtype>
accelerator_inline void multFlavourMinusIdentity(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = -rhs(0);
ret(1) = -rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourMinusIdentity(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(0,0);
ret(0,1) = -rhs(0,1);
ret(1,0) = -rhs(1,0);
ret(1,1) = -rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourMinusIdentity(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -rhs(0,0);
ret(0,1) = -rhs(0,1);
ret(1,0) = -rhs(1,0);
ret(1,1) = -rhs(1,1);
};
//G-parity flavour projection 1/2(1+\sigma_2)
//1 -i
//i 1
template<class vtype>
accelerator_inline void multFlavourProjPlus(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = 0.5*rhs(0) + 0.5*timesMinusI(rhs(1));
ret(1) = 0.5*timesI(rhs(0)) + 0.5*rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourProjPlus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = 0.5*rhs(0,0) + 0.5*timesMinusI(rhs(1,0));
ret(0,1) = 0.5*rhs(0,1) + 0.5*timesMinusI(rhs(1,1));
ret(1,0) = 0.5*timesI(rhs(0,0)) + 0.5*rhs(1,0);
ret(1,1) = 0.5*timesI(rhs(0,1)) + 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourProjPlus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = 0.5*rhs(0,0) + 0.5*timesI(rhs(0,1));
ret(0,1) = 0.5*timesMinusI(rhs(0,0)) + 0.5*rhs(0,1);
ret(1,0) = 0.5*rhs(1,0) + 0.5*timesI(rhs(1,1));
ret(1,1) = 0.5*timesMinusI(rhs(1,0)) + 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void multFlavourMinusProjPlus(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = -0.5*rhs(0) + 0.5*timesI(rhs(1));
ret(1) = 0.5*timesMinusI(rhs(0)) - 0.5*rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourMinusProjPlus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -0.5*rhs(0,0) + 0.5*timesI(rhs(1,0));
ret(0,1) = -0.5*rhs(0,1) + 0.5*timesI(rhs(1,1));
ret(1,0) = 0.5*timesMinusI(rhs(0,0)) - 0.5*rhs(1,0);
ret(1,1) = 0.5*timesMinusI(rhs(0,1)) - 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourMinusProjPlus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -0.5*rhs(0,0) + 0.5*timesMinusI(rhs(0,1));
ret(0,1) = 0.5*timesI(rhs(0,0)) - 0.5*rhs(0,1);
ret(1,0) = -0.5*rhs(1,0) + 0.5*timesMinusI(rhs(1,1));
ret(1,1) = 0.5*timesI(rhs(1,0)) - 0.5*rhs(1,1);
};
//G-parity flavour projection 1/2(1-\sigma_2)
//1 i
//-i 1
template<class vtype>
accelerator_inline void multFlavourProjMinus(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = 0.5*rhs(0) + 0.5*timesI(rhs(1));
ret(1) = 0.5*timesMinusI(rhs(0)) + 0.5*rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourProjMinus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = 0.5*rhs(0,0) + 0.5*timesI(rhs(1,0));
ret(0,1) = 0.5*rhs(0,1) + 0.5*timesI(rhs(1,1));
ret(1,0) = 0.5*timesMinusI(rhs(0,0)) + 0.5*rhs(1,0);
ret(1,1) = 0.5*timesMinusI(rhs(0,1)) + 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourProjMinus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = 0.5*rhs(0,0) + 0.5*timesMinusI(rhs(0,1));
ret(0,1) = 0.5*timesI(rhs(0,0)) + 0.5*rhs(0,1);
ret(1,0) = 0.5*rhs(1,0) + 0.5*timesMinusI(rhs(1,1));
ret(1,1) = 0.5*timesI(rhs(1,0)) + 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void multFlavourMinusProjMinus(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
{
ret(0) = -0.5*rhs(0) + 0.5*timesMinusI(rhs(1));
ret(1) = 0.5*timesI(rhs(0)) - 0.5*rhs(1);
};
template<class vtype>
accelerator_inline void lmultFlavourMinusProjMinus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -0.5*rhs(0,0) + 0.5*timesMinusI(rhs(1,0));
ret(0,1) = -0.5*rhs(0,1) + 0.5*timesMinusI(rhs(1,1));
ret(1,0) = 0.5*timesI(rhs(0,0)) - 0.5*rhs(1,0);
ret(1,1) = 0.5*timesI(rhs(0,1)) - 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline void rmultFlavourMinusProjMinus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
{
ret(0,0) = -0.5*rhs(0,0) + 0.5*timesI(rhs(0,1));
ret(0,1) = 0.5*timesMinusI(rhs(0,0)) - 0.5*rhs(0,1);
ret(1,0) = -0.5*rhs(1,0) + 0.5*timesI(rhs(1,1));
ret(1,1) = 0.5*timesMinusI(rhs(1,0)) - 0.5*rhs(1,1);
};
template<class vtype>
accelerator_inline auto operator*(const GparityFlavour &G, const iVector<vtype, Ngp> &arg)
->typename std::enable_if<matchGridTensorIndex<iVector<vtype, Ngp>, GparityFlavourTensorIndex>::value, iVector<vtype, Ngp>>::type
{
iVector<vtype, Ngp> ret;
switch (G.g)
{
case GparityFlavour::Algebra::SigmaX:
multFlavourSigmaX(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaX:
multFlavourMinusSigmaX(ret, arg); break;
case GparityFlavour::Algebra::SigmaY:
multFlavourSigmaY(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaY:
multFlavourMinusSigmaY(ret, arg); break;
case GparityFlavour::Algebra::SigmaZ:
multFlavourSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaZ:
multFlavourMinusSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::Identity:
multFlavourIdentity(ret, arg); break;
case GparityFlavour::Algebra::MinusIdentity:
multFlavourMinusIdentity(ret, arg); break;
case GparityFlavour::Algebra::ProjPlus:
multFlavourProjPlus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjPlus:
multFlavourMinusProjPlus(ret, arg); break;
case GparityFlavour::Algebra::ProjMinus:
multFlavourProjMinus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjMinus:
multFlavourMinusProjMinus(ret, arg); break;
default: assert(0);
}
return ret;
}
template<class vtype>
accelerator_inline auto operator*(const GparityFlavour &G, const iMatrix<vtype, Ngp> &arg)
->typename std::enable_if<matchGridTensorIndex<iMatrix<vtype, Ngp>, GparityFlavourTensorIndex>::value, iMatrix<vtype, Ngp>>::type
{
iMatrix<vtype, Ngp> ret;
switch (G.g)
{
case GparityFlavour::Algebra::SigmaX:
lmultFlavourSigmaX(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaX:
lmultFlavourMinusSigmaX(ret, arg); break;
case GparityFlavour::Algebra::SigmaY:
lmultFlavourSigmaY(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaY:
lmultFlavourMinusSigmaY(ret, arg); break;
case GparityFlavour::Algebra::SigmaZ:
lmultFlavourSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaZ:
lmultFlavourMinusSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::Identity:
lmultFlavourIdentity(ret, arg); break;
case GparityFlavour::Algebra::MinusIdentity:
lmultFlavourMinusIdentity(ret, arg); break;
case GparityFlavour::Algebra::ProjPlus:
lmultFlavourProjPlus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjPlus:
lmultFlavourMinusProjPlus(ret, arg); break;
case GparityFlavour::Algebra::ProjMinus:
lmultFlavourProjMinus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjMinus:
lmultFlavourMinusProjMinus(ret, arg); break;
default: assert(0);
}
return ret;
}
template<class vtype>
accelerator_inline auto operator*(const iMatrix<vtype, Ngp> &arg, const GparityFlavour &G)
->typename std::enable_if<matchGridTensorIndex<iMatrix<vtype, Ngp>, GparityFlavourTensorIndex>::value, iMatrix<vtype, Ngp>>::type
{
iMatrix<vtype, Ngp> ret;
switch (G.g)
{
case GparityFlavour::Algebra::SigmaX:
rmultFlavourSigmaX(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaX:
rmultFlavourMinusSigmaX(ret, arg); break;
case GparityFlavour::Algebra::SigmaY:
rmultFlavourSigmaY(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaY:
rmultFlavourMinusSigmaY(ret, arg); break;
case GparityFlavour::Algebra::SigmaZ:
rmultFlavourSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::MinusSigmaZ:
rmultFlavourMinusSigmaZ(ret, arg); break;
case GparityFlavour::Algebra::Identity:
rmultFlavourIdentity(ret, arg); break;
case GparityFlavour::Algebra::MinusIdentity:
rmultFlavourMinusIdentity(ret, arg); break;
case GparityFlavour::Algebra::ProjPlus:
rmultFlavourProjPlus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjPlus:
rmultFlavourMinusProjPlus(ret, arg); break;
case GparityFlavour::Algebra::ProjMinus:
rmultFlavourProjMinus(ret, arg); break;
case GparityFlavour::Algebra::MinusProjMinus:
rmultFlavourMinusProjMinus(ret, arg); break;
default: assert(0);
}
return ret;
}
NAMESPACE_END(Grid);
#endif // include guard

View File

@ -129,18 +129,10 @@ public:
Runner(S);
}
//////////////////////////////////////////////////////////////////
private:
template <class SmearingPolicy>
void Runner(SmearingPolicy &Smearing) {
auto UGrid = Resources.GetCartesian();
Resources.AddRNGs();
Field U(UGrid);
// Can move this outside?
typedef IntegratorType<SmearingPolicy> TheIntegrator;
TheIntegrator MDynamics(UGrid, Parameters.MD, TheAction, Smearing);
//Use the checkpointer to initialize the RNGs and the gauge field, writing the resulting gauge field into U.
//This is called automatically by Run but may be useful elsewhere, e.g. for integrator tuning experiments
void initializeGaugeFieldAndRNGs(Field &U){
if(!Resources.haveRNGs()) Resources.AddRNGs();
if (Parameters.StartingType == "HotStart") {
// Hot start
@ -167,6 +159,25 @@ private:
<< "Valid [HotStart, ColdStart, TepidStart, CheckpointStart]\n";
exit(1);
}
}
//////////////////////////////////////////////////////////////////
private:
template <class SmearingPolicy>
void Runner(SmearingPolicy &Smearing) {
auto UGrid = Resources.GetCartesian();
Field U(UGrid);
initializeGaugeFieldAndRNGs(U);
typedef IntegratorType<SmearingPolicy> TheIntegrator;
TheIntegrator MDynamics(UGrid, Parameters.MD, TheAction, Smearing);
// Sets the momentum filter
MDynamics.setMomentumFilter(*(Resources.GetMomentumFilter()));
Smearing.set_Field(U);

View File

@ -34,6 +34,7 @@ directory
* @brief Classes for Hybrid Monte Carlo update
*
* @author Guido Cossu
* @author Peter Boyle
*/
//--------------------------------------------------------------------
#pragma once
@ -115,22 +116,17 @@ private:
random(sRNG, rn_test);
std::cout << GridLogMessage
<< "--------------------------------------------------\n";
std::cout << GridLogMessage << "exp(-dH) = " << prob
<< " Random = " << rn_test << "\n";
std::cout << GridLogMessage
<< "Acc. Probability = " << ((prob < 1.0) ? prob : 1.0) << "\n";
std::cout << GridLogHMC << "--------------------------------------------------\n";
std::cout << GridLogHMC << "exp(-dH) = " << prob << " Random = " << rn_test << "\n";
std::cout << GridLogHMC << "Acc. Probability = " << ((prob < 1.0) ? prob : 1.0) << "\n";
if ((prob > 1.0) || (rn_test <= prob)) { // accepted
std::cout << GridLogMessage << "Metropolis_test -- ACCEPTED\n";
std::cout << GridLogMessage
<< "--------------------------------------------------\n";
std::cout << GridLogHMC << "Metropolis_test -- ACCEPTED\n";
std::cout << GridLogHMC << "--------------------------------------------------\n";
return true;
} else { // rejected
std::cout << GridLogMessage << "Metropolis_test -- REJECTED\n";
std::cout << GridLogMessage
<< "--------------------------------------------------\n";
std::cout << GridLogHMC << "Metropolis_test -- REJECTED\n";
std::cout << GridLogHMC << "--------------------------------------------------\n";
return false;
}
}
@ -139,18 +135,67 @@ private:
// Evolution
/////////////////////////////////////////////////////////
RealD evolve_hmc_step(Field &U) {
TheIntegrator.refresh(U, sRNG, pRNG); // set U and initialize P and phi's
RealD H0 = TheIntegrator.S(U); // initial state action
GridBase *Grid = U.Grid();
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Mainly for DDHMC perform a random translation of U modulo volume
//////////////////////////////////////////////////////////////////////////////////////////////////////
std::cout << GridLogMessage << "--------------------------------------------------\n";
std::cout << GridLogMessage << "Random shifting gauge field by [";
for(int d=0;d<Grid->Nd();d++) {
int L = Grid->GlobalDimensions()[d];
RealD rn_uniform; random(sRNG, rn_uniform);
int shift = (int) (rn_uniform*L);
std::cout << shift;
if(d<Grid->Nd()-1) std::cout <<",";
else std::cout <<"]\n";
U = Cshift(U,d,shift);
}
std::cout << GridLogMessage << "--------------------------------------------------\n";
TheIntegrator.reset_timer();
//////////////////////////////////////////////////////////////////////////////////////////////////////
// set U and initialize P and phi's
//////////////////////////////////////////////////////////////////////////////////////////////////////
std::cout << GridLogMessage << "--------------------------------------------------\n";
std::cout << GridLogMessage << "Refresh momenta and pseudofermions";
TheIntegrator.refresh(U, sRNG, pRNG);
std::cout << GridLogMessage << "--------------------------------------------------\n";
//////////////////////////////////////////////////////////////////////////////////////////////////////
// initial state action
//////////////////////////////////////////////////////////////////////////////////////////////////////
std::cout << GridLogMessage << "--------------------------------------------------\n";
std::cout << GridLogMessage << "Compute initial action";
RealD H0 = TheIntegrator.S(U);
std::cout << GridLogMessage << "--------------------------------------------------\n";
std::streamsize current_precision = std::cout.precision();
std::cout.precision(15);
std::cout << GridLogMessage << "Total H before trajectory = " << H0 << "\n";
std::cout << GridLogHMC << "Total H before trajectory = " << H0 << "\n";
std::cout.precision(current_precision);
std::cout << GridLogMessage << "--------------------------------------------------\n";
std::cout << GridLogMessage << " Molecular Dynamics evolution ";
TheIntegrator.integrate(U);
std::cout << GridLogMessage << "--------------------------------------------------\n";
//////////////////////////////////////////////////////////////////////////////////////////////////////
// updated state action
//////////////////////////////////////////////////////////////////////////////////////////////////////
std::cout << GridLogMessage << "--------------------------------------------------\n";
std::cout << GridLogMessage << "Compute final action";
RealD H1 = TheIntegrator.S(U);
std::cout << GridLogMessage << "--------------------------------------------------\n";
RealD H1 = TheIntegrator.S(U); // updated state action
///////////////////////////////////////////////////////////
if(0){
@ -163,18 +208,17 @@ private:
}
///////////////////////////////////////////////////////////
std::cout.precision(15);
std::cout << GridLogMessage << "Total H after trajectory = " << H1
<< " dH = " << H1 - H0 << "\n";
std::cout << GridLogHMC << "--------------------------------------------------\n";
std::cout << GridLogHMC << "Total H after trajectory = " << H1 << " dH = " << H1 - H0 << "\n";
std::cout << GridLogHMC << "--------------------------------------------------\n";
std::cout.precision(current_precision);
return (H1 - H0);
}
public:
/////////////////////////////////////////
// Constructor
@ -195,10 +239,13 @@ public:
// Actual updates (evolve a copy Ucopy then copy back eventually)
unsigned int FinalTrajectory = Params.Trajectories + Params.NoMetropolisUntil + Params.StartTrajectory;
for (int traj = Params.StartTrajectory; traj < FinalTrajectory; ++traj) {
std::cout << GridLogMessage << "-- # Trajectory = " << traj << "\n";
std::cout << GridLogHMC << "-- # Trajectory = " << traj << "\n";
if (traj < Params.StartTrajectory + Params.NoMetropolisUntil) {
std::cout << GridLogMessage << "-- Thermalization" << std::endl;
std::cout << GridLogHMC << "-- Thermalization" << std::endl;
}
double t0=usecond();
@ -207,20 +254,19 @@ public:
DeltaH = evolve_hmc_step(Ucopy);
// Metropolis-Hastings test
bool accept = true;
if (traj >= Params.StartTrajectory + Params.NoMetropolisUntil) {
if (Params.MetropolisTest && traj >= Params.StartTrajectory + Params.NoMetropolisUntil) {
accept = metropolis_test(DeltaH);
} else {
std::cout << GridLogMessage << "Skipping Metropolis test" << std::endl;
std::cout << GridLogHMC << "Skipping Metropolis test" << std::endl;
}
if (accept)
Ucur = Ucopy;
double t1=usecond();
std::cout << GridLogMessage << "Total time for trajectory (s): " << (t1-t0)/1e6 << std::endl;
std::cout << GridLogHMC << "Total time for trajectory (s): " << (t1-t0)/1e6 << std::endl;
TheIntegrator.print_timer();
for (int obs = 0; obs < Observables.size(); obs++) {
std::cout << GridLogDebug << "Observables # " << obs << std::endl;
@ -228,7 +274,7 @@ public:
std::cout << GridLogDebug << "Observables pointer " << Observables[obs] << std::endl;
Observables[obs]->TrajectoryComplete(traj + 1, Ucur, sRNG, pRNG);
}
std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::::" << std::endl;
std::cout << GridLogHMC << ":::::::::::::::::::::::::::::::::::::::::::" << std::endl;
}
}

View File

@ -72,6 +72,8 @@ class HMCResourceManager {
typedef HMCModuleBase< BaseHmcCheckpointer<ImplementationPolicy> > CheckpointerBaseModule;
typedef HMCModuleBase< HmcObservable<typename ImplementationPolicy::Field> > ObservableBaseModule;
typedef ActionModuleBase< Action<typename ImplementationPolicy::Field>, GridModule > ActionBaseModule;
typedef typename ImplementationPolicy::Field MomentaField;
typedef typename ImplementationPolicy::Field Field;
// Named storage for grid pairs (std + red-black)
std::unordered_map<std::string, GridModule> Grids;
@ -80,6 +82,9 @@ class HMCResourceManager {
// SmearingModule<ImplementationPolicy> Smearing;
std::unique_ptr<CheckpointerBaseModule> CP;
// Momentum filter
std::unique_ptr<MomentumFilterBase<typename ImplementationPolicy::Field> > Filter;
// A vector of HmcObservable modules
std::vector<std::unique_ptr<ObservableBaseModule> > ObservablesList;
@ -90,6 +95,7 @@ class HMCResourceManager {
bool have_RNG;
bool have_CheckPointer;
bool have_Filter;
// NOTE: operator << is not overloaded for std::vector<string>
// so this function is necessary
@ -101,7 +107,7 @@ class HMCResourceManager {
public:
HMCResourceManager() : have_RNG(false), have_CheckPointer(false) {}
HMCResourceManager() : have_RNG(false), have_CheckPointer(false), have_Filter(false) {}
template <class ReaderClass, class vector_type = vComplex >
void initialize(ReaderClass &Read){
@ -129,6 +135,7 @@ public:
RNGModuleParameters RNGpar(Read);
SetRNGSeeds(RNGpar);
// Observables
auto &ObsFactory = HMC_ObservablesModuleFactory<observable_string, typename ImplementationPolicy::Field, ReaderClass>::getInstance();
Read.push(observable_string);// here must check if existing...
@ -208,6 +215,16 @@ public:
AddGrid(s, Mod);
}
void SetMomentumFilter( MomentumFilterBase<typename ImplementationPolicy::Field> * MomFilter) {
assert(have_Filter==false);
Filter = std::unique_ptr<MomentumFilterBase<typename ImplementationPolicy::Field> >(MomFilter);
have_Filter = true;
}
MomentumFilterBase<typename ImplementationPolicy::Field> *GetMomentumFilter(void) {
if ( !have_Filter)
SetMomentumFilter(new MomentumFilterNone<typename ImplementationPolicy::Field>());
return Filter.get();
}
GridCartesian* GetCartesian(std::string s = "") {
if (s.empty()) s = Grids.begin()->first;
@ -227,6 +244,9 @@ public:
// Random number generators
//////////////////////////////////////////////////////
//Return true if the RNG objects have been instantiated
bool haveRNGs() const{ return have_RNG; }
void AddRNGs(std::string s = "") {
// Couple the RNGs to the GridModule tagged by s
// the default is the first grid registered

View File

@ -33,7 +33,6 @@ directory
#define INTEGRATOR_INCLUDED
#include <memory>
#include "MomentumFilter.h"
NAMESPACE_BEGIN(Grid);
@ -67,6 +66,7 @@ public:
template <class FieldImplementation, class SmearingPolicy, class RepresentationPolicy>
class Integrator {
protected:
typedef typename FieldImplementation::Field MomentaField; //for readability
typedef typename FieldImplementation::Field Field;
@ -119,36 +119,58 @@ protected:
}
} update_P_hireps{};
void update_P(MomentaField& Mom, Field& U, int level, double ep) {
// input U actually not used in the fundamental case
// Fundamental updates, include smearing
for (int a = 0; a < as[level].actions.size(); ++a) {
double start_full = usecond();
Field force(U.Grid());
conformable(U.Grid(), Mom.Grid());
Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared);
double start_force = usecond();
as[level].actions.at(a)->deriv_timer_start();
as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta
as[level].actions.at(a)->deriv_timer_stop();
std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl;
auto name = as[level].actions.at(a)->action_name();
if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
force = FieldImplementation::projectForce(force); // Ta for gauge fields
double end_force = usecond();
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites());
std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] Force average: " << force_abs << std::endl;
MomFilter->applyFilter(force);
std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<<a <<"] "<<name<< std::endl;
// DumpSliceNorm("force ",force,Nd-1);
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x])
Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
Real force_max = std::sqrt(maxLocalNorm2(force));
Real impulse_max = force_max * ep * HMC_MOMENTUM_DENOMINATOR;
as[level].actions.at(a)->deriv_log(force_abs,force_max);
std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Force average: " << force_abs <<" "<<name<<std::endl;
std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Force max : " << force_max <<" "<<name<<std::endl;
std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Fdt average : " << impulse_abs <<" "<<name<<std::endl;
std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Fdt max : " << impulse_max <<" "<<name<<std::endl;
Mom -= force * ep* HMC_MOMENTUM_DENOMINATOR;;
double end_full = usecond();
double time_full = (end_full - start_full) / 1e3;
double time_force = (end_force - start_force) / 1e3;
std::cout << GridLogMessage << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)" << std::endl;
}
// Force from the other representations
as[level].apply(update_P_hireps, Representations, Mom, U, ep);
MomFilter->applyFilter(Mom);
}
void update_U(Field& U, double ep)
@ -162,8 +184,12 @@ protected:
void update_U(MomentaField& Mom, Field& U, double ep)
{
MomentaField MomFiltered(Mom.Grid());
MomFiltered = Mom;
MomFilter->applyFilter(MomFiltered);
// exponential of Mom*U in the gauge fields case
FieldImplementation::update_field(Mom, U, ep);
FieldImplementation::update_field(MomFiltered, U, ep);
// Update the smeared fields, can be implemented as observer
Smearer.set_Field(U);
@ -206,6 +232,66 @@ public:
const MomentaField & getMomentum() const{ return P; }
void reset_timer(void)
{
for (int level = 0; level < as.size(); ++level) {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
as[level].actions.at(actionID)->reset_timer();
}
}
}
void print_timer(void)
{
std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::" << std::endl;
std::cout << GridLogMessage << " Refresh cumulative timings "<<std::endl;
std::cout << GridLogMessage << "--------------------------- "<<std::endl;
for (int level = 0; level < as.size(); ++level) {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
std::cout << GridLogMessage
<< as[level].actions.at(actionID)->action_name()
<<"["<<level<<"]["<< actionID<<"] "
<< as[level].actions.at(actionID)->refresh_us*1.0e-6<<" s"<< std::endl;
}
}
std::cout << GridLogMessage << "--------------------------- "<<std::endl;
std::cout << GridLogMessage << " Action cumulative timings "<<std::endl;
std::cout << GridLogMessage << "--------------------------- "<<std::endl;
for (int level = 0; level < as.size(); ++level) {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
std::cout << GridLogMessage
<< as[level].actions.at(actionID)->action_name()
<<"["<<level<<"]["<< actionID<<"] "
<< as[level].actions.at(actionID)->S_us*1.0e-6<<" s"<< std::endl;
}
}
std::cout << GridLogMessage << "--------------------------- "<<std::endl;
std::cout << GridLogMessage << " Force cumulative timings "<<std::endl;
std::cout << GridLogMessage << "------------------------- "<<std::endl;
for (int level = 0; level < as.size(); ++level) {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
std::cout << GridLogMessage
<< as[level].actions.at(actionID)->action_name()
<<"["<<level<<"]["<< actionID<<"] "
<< as[level].actions.at(actionID)->deriv_us*1.0e-6<<" s"<< std::endl;
}
}
std::cout << GridLogMessage << "--------------------------- "<<std::endl;
std::cout << GridLogMessage << " Force average size "<<std::endl;
std::cout << GridLogMessage << "------------------------- "<<std::endl;
for (int level = 0; level < as.size(); ++level) {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
std::cout << GridLogMessage
<< as[level].actions.at(actionID)->action_name()
<<"["<<level<<"]["<< actionID<<"] : "
<<" force max " << as[level].actions.at(actionID)->deriv_max_average()
<<" norm " << as[level].actions.at(actionID)->deriv_norm_average()
<<" calls " << as[level].actions.at(actionID)->deriv_num
<< std::endl;
}
}
std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::"<< std::endl;
}
void print_parameters()
{
std::cout << GridLogMessage << "[Integrator] Name : "<< integrator_name() << std::endl;
@ -224,7 +310,6 @@ public:
}
}
std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::"<< std::endl;
}
void reverse_momenta()
@ -267,15 +352,19 @@ public:
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
// get gauge field from the SmearingPolicy and
// based on the boolean is_smeared in actionID
auto name = as[level].actions.at(actionID)->action_name();
std::cout << GridLogMessage << "refresh [" << level << "][" << actionID << "] "<<name << std::endl;
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
as[level].actions.at(actionID)->refresh_timer_start();
as[level].actions.at(actionID)->refresh(Us, sRNG, pRNG);
as[level].actions.at(actionID)->refresh_timer_stop();
}
// Refresh the higher representation actions
as[level].apply(refresh_hireps, Representations, sRNG, pRNG);
}
MomFilter->applyFilter(P);
}
// to be used by the actionlevel class to iterate
@ -310,7 +399,9 @@ public:
// based on the boolean is_smeared in actionID
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
as[level].actions.at(actionID)->S_timer_start();
Hterm = as[level].actions.at(actionID)->S(Us);
as[level].actions.at(actionID)->S_timer_stop();
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
H += Hterm;
}

View File

@ -131,8 +131,11 @@ class CartesianStencilAccelerator {
int _checkerboard;
int _npoints; // Move to template param?
int _osites;
int _dirichlet;
StencilVector _directions;
StencilVector _distances;
StencilVector _comms_send;
StencilVector _comms_recv;
StencilVector _comm_buf_size;
StencilVector _permute_type;
StencilVector same_node;
@ -226,6 +229,8 @@ public:
void * recv_buf;
Integer to_rank;
Integer from_rank;
Integer do_send;
Integer do_recv;
Integer bytes;
};
struct Merge {
@ -240,7 +245,20 @@ public:
cobj * mpi_p;
Integer buffer_size;
};
struct CopyReceiveBuffer {
void * from_p;
void * to_p;
Integer bytes;
};
struct CachedTransfer {
Integer direction;
Integer OrthogPlane;
Integer DestProc;
Integer bytes;
Integer lane;
Integer cb;
void *recv_buf;
};
protected:
GridBase * _grid;
@ -271,7 +289,8 @@ public:
std::vector<Merge> MergersSHM;
std::vector<Decompress> Decompressions;
std::vector<Decompress> DecompressionsSHM;
std::vector<CopyReceiveBuffer> CopyReceiveBuffers ;
std::vector<CachedTransfer> CachedTransfers;
///////////////////////////////////////////////////////////
// Unified Comms buffers for all directions
///////////////////////////////////////////////////////////
@ -284,29 +303,6 @@ public:
int u_comm_offset;
int _unified_buffer_size;
/////////////////////////////////////////
// Timing info; ugly; possibly temporary
/////////////////////////////////////////
double commtime;
double mpi3synctime;
double mpi3synctime_g;
double shmmergetime;
double gathertime;
double gathermtime;
double halogtime;
double mergetime;
double decompresstime;
double comms_bytes;
double shm_bytes;
double splicetime;
double nosplicetime;
double calls;
std::vector<double> comm_bytes_thr;
std::vector<double> shm_bytes_thr;
std::vector<double> comm_time_thr;
std::vector<double> comm_enter_thr;
std::vector<double> comm_leave_thr;
////////////////////////////////////////
// Stencil query
////////////////////////////////////////
@ -333,11 +329,12 @@ public:
//////////////////////////////////////////
// Comms packet queue for asynch thread
// Use OpenMP Tasks for cleaner ???
// must be called *inside* parallel region
//////////////////////////////////////////
/*
void CommunicateThreaded()
{
#ifdef GRID_OMP
// must be called in parallel region
int mythread = omp_get_thread_num();
int nthreads = CartesianCommunicator::nCommThreads;
#else
@ -346,65 +343,29 @@ public:
#endif
if (nthreads == -1) nthreads = 1;
if (mythread < nthreads) {
comm_enter_thr[mythread] = usecond();
for (int i = mythread; i < Packets.size(); i += nthreads) {
uint64_t bytes = _grid->StencilSendToRecvFrom(Packets[i].send_buf,
Packets[i].to_rank,
Packets[i].recv_buf,
Packets[i].from_rank,
Packets[i].bytes,i);
comm_bytes_thr[mythread] += bytes;
shm_bytes_thr[mythread] += 2*Packets[i].bytes-bytes; // Send + Recv.
}
comm_leave_thr[mythread]= usecond();
comm_time_thr[mythread] += comm_leave_thr[mythread] - comm_enter_thr[mythread];
}
}
void CollateThreads(void)
{
int nthreads = CartesianCommunicator::nCommThreads;
double first=0.0;
double last =0.0;
for(int t=0;t<nthreads;t++) {
double t0 = comm_enter_thr[t];
double t1 = comm_leave_thr[t];
comms_bytes+=comm_bytes_thr[t];
shm_bytes +=shm_bytes_thr[t];
comm_enter_thr[t] = 0.0;
comm_leave_thr[t] = 0.0;
comm_time_thr[t] = 0.0;
comm_bytes_thr[t]=0;
shm_bytes_thr[t]=0;
if ( first == 0.0 ) first = t0; // first is t0
if ( (t0 > 0.0) && ( t0 < first ) ) first = t0; // min time seen
if ( t1 > last ) last = t1; // max time seen
}
commtime+= last-first;
}
*/
////////////////////////////////////////////////////////////////////////
// Non blocking send and receive. Necessarily parallel.
////////////////////////////////////////////////////////////////////////
void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
{
reqs.resize(Packets.size());
commtime-=usecond();
for(int i=0;i<Packets.size();i++){
uint64_t bytes=_grid->StencilSendToRecvFromBegin(reqs[i],
_grid->StencilSendToRecvFromBegin(reqs[i],
Packets[i].send_buf,
Packets[i].to_rank,
Packets[i].to_rank,Packets[i].do_send,
Packets[i].recv_buf,
Packets[i].from_rank,
Packets[i].from_rank,Packets[i].do_recv,
Packets[i].bytes,i);
comms_bytes+=bytes;
shm_bytes +=2*Packets[i].bytes-bytes;
}
}
@ -413,7 +374,6 @@ public:
for(int i=0;i<Packets.size();i++){
_grid->StencilSendToRecvFromComplete(reqs[i],i);
}
commtime+=usecond();
}
////////////////////////////////////////////////////////////////////////
// Blocking send and receive. Either sequential or parallel.
@ -421,28 +381,27 @@ public:
void Communicate(void)
{
if ( CartesianCommunicator::CommunicatorPolicy == CartesianCommunicator::CommunicatorPolicySequential ){
thread_region {
// must be called in parallel region
int mythread = thread_num();
int maxthreads= thread_max();
/////////////////////////////////////////////////////////
// several way threaded on different communicators.
// Cannot combine with Dirichlet operators
// This scheme is needed on Intel Omnipath for best performance
// Deprecate once there are very few omnipath clusters
/////////////////////////////////////////////////////////
int nthreads = CartesianCommunicator::nCommThreads;
assert(nthreads <= maxthreads);
if (nthreads == -1) nthreads = 1;
if (mythread < nthreads) {
for (int i = mythread; i < Packets.size(); i += nthreads) {
double start = usecond();
uint64_t bytes= _grid->StencilSendToRecvFrom(Packets[i].send_buf,
Packets[i].to_rank,
int old = GridThread::GetThreads();
GridThread::SetThreads(nthreads);
thread_for(i,Packets.size(),{
_grid->StencilSendToRecvFrom(Packets[i].send_buf,
Packets[i].to_rank,Packets[i].do_send,
Packets[i].recv_buf,
Packets[i].from_rank,
Packets[i].from_rank,Packets[i].do_recv,
Packets[i].bytes,i);
comm_bytes_thr[mythread] += bytes;
shm_bytes_thr[mythread] += Packets[i].bytes - bytes;
comm_time_thr[mythread] += usecond() - start;
}
}
}
} else { // Concurrent and non-threaded asynch calls to MPI
});
GridThread::SetThreads(old);
} else {
/////////////////////////////////////////////////////////
// Concurrent and non-threaded asynch calls to MPI
/////////////////////////////////////////////////////////
std::vector<std::vector<CommsRequest_t> > reqs;
this->CommunicateBegin(reqs);
this->CommunicateComplete(reqs);
@ -484,31 +443,23 @@ public:
sshift[1] = _grid->CheckerBoardShiftForCB(this->_checkerboard,dimension,shift,Odd);
if ( sshift[0] == sshift[1] ) {
if (splice_dim) {
splicetime-=usecond();
auto tmp = GatherSimd(source,dimension,shift,0x3,compress,face_idx);
auto tmp = GatherSimd(source,dimension,shift,0x3,compress,face_idx,point);
is_same_node = is_same_node && tmp;
splicetime+=usecond();
} else {
nosplicetime-=usecond();
auto tmp = Gather(source,dimension,shift,0x3,compress,face_idx);
auto tmp = Gather(source,dimension,shift,0x3,compress,face_idx,point);
is_same_node = is_same_node && tmp;
nosplicetime+=usecond();
}
} else {
if(splice_dim){
splicetime-=usecond();
// if checkerboard is unfavourable take two passes
// both with block stride loop iteration
auto tmp1 = GatherSimd(source,dimension,shift,0x1,compress,face_idx);
auto tmp2 = GatherSimd(source,dimension,shift,0x2,compress,face_idx);
auto tmp1 = GatherSimd(source,dimension,shift,0x1,compress,face_idx,point);
auto tmp2 = GatherSimd(source,dimension,shift,0x2,compress,face_idx,point);
is_same_node = is_same_node && tmp1 && tmp2;
splicetime+=usecond();
} else {
nosplicetime-=usecond();
auto tmp1 = Gather(source,dimension,shift,0x1,compress,face_idx);
auto tmp2 = Gather(source,dimension,shift,0x2,compress,face_idx);
auto tmp1 = Gather(source,dimension,shift,0x1,compress,face_idx,point);
auto tmp2 = Gather(source,dimension,shift,0x2,compress,face_idx,point);
is_same_node = is_same_node && tmp1 && tmp2;
nosplicetime+=usecond();
}
}
}
@ -518,13 +469,10 @@ public:
template<class compressor>
void HaloGather(const Lattice<vobj> &source,compressor &compress)
{
mpi3synctime_g-=usecond();
_grid->StencilBarrier();// Synch shared memory on a single nodes
mpi3synctime_g+=usecond();
// conformable(source.Grid(),_grid);
assert(source.Grid()==_grid);
halogtime-=usecond();
u_comm_offset=0;
@ -538,7 +486,6 @@ public:
assert(u_comm_offset==_unified_buffer_size);
accelerator_barrier();
halogtime+=usecond();
}
/////////////////////////
@ -551,14 +498,72 @@ public:
Mergers.resize(0);
MergersSHM.resize(0);
Packets.resize(0);
calls++;
CopyReceiveBuffers.resize(0);
CachedTransfers.resize(0);
}
void AddPacket(void *xmit,void * rcv, Integer to,Integer from,Integer bytes){
void AddCopy(void *from,void * to, Integer bytes)
{
// std::cout << "Adding CopyReceiveBuffer "<<std::hex<<from<<" "<<to<<std::dec<<" "<<bytes<<std::endl;
CopyReceiveBuffer obj;
obj.from_p = from;
obj.to_p = to;
obj.bytes= bytes;
CopyReceiveBuffers.push_back(obj);
}
void CommsCopy()
{
// These are device resident MPI buffers.
for(int i=0;i<CopyReceiveBuffers.size();i++){
cobj *from=(cobj *)CopyReceiveBuffers[i].from_p;
cobj *to =(cobj *)CopyReceiveBuffers[i].to_p;
Integer words = CopyReceiveBuffers[i].bytes/sizeof(cobj);
// std::cout << "CopyReceiveBuffer "<<std::hex<<from<<" "<<to<<std::dec<<" "<<words*sizeof(cobj)<<std::endl;
accelerator_forNB(j, words, cobj::Nsimd(), {
coalescedWrite(to[j] ,coalescedRead(from [j]));
});
}
}
Integer CheckForDuplicate(Integer direction, Integer OrthogPlane, Integer DestProc, void *recv_buf,Integer lane,Integer bytes,Integer cb)
{
CachedTransfer obj;
obj.direction = direction;
obj.OrthogPlane = OrthogPlane;
obj.DestProc = DestProc;
obj.recv_buf = recv_buf;
obj.lane = lane;
obj.bytes = bytes;
obj.cb = cb;
for(int i=0;i<CachedTransfers.size();i++){
if ( (CachedTransfers[i].direction ==direction)
&&(CachedTransfers[i].OrthogPlane==OrthogPlane)
&&(CachedTransfers[i].DestProc ==DestProc)
&&(CachedTransfers[i].bytes ==bytes)
&&(CachedTransfers[i].lane ==lane)
&&(CachedTransfers[i].cb ==cb)
){
// std::cout << "Found duplicate plane dir "<<direction<<" plane "<< OrthogPlane<< " simd "<<lane << " relproc "<<DestProc<< " bytes "<<bytes <<std::endl;
AddCopy(CachedTransfers[i].recv_buf,recv_buf,bytes);
return 1;
}
}
// std::cout << "No duplicate plane dir "<<direction<<" plane "<< OrthogPlane<< " simd "<<lane << " relproc "<<DestProc<<" bytes "<<bytes<<std::endl;
CachedTransfers.push_back(obj);
return 0;
}
void AddPacket(void *xmit,void * rcv,
Integer to, Integer do_send,
Integer from, Integer do_recv,
Integer bytes){
Packet p;
p.send_buf = xmit;
p.recv_buf = rcv;
p.to_rank = to;
p.from_rank= from;
p.do_send = do_send;
p.do_recv = do_recv;
p.bytes = bytes;
Packets.push_back(p);
}
@ -578,22 +583,17 @@ public:
mv.push_back(m);
}
template<class decompressor> void CommsMerge(decompressor decompress) {
CommsCopy();
CommsMerge(decompress,Mergers,Decompressions);
}
template<class decompressor> void CommsMergeSHM(decompressor decompress) {
mpi3synctime-=usecond();
_grid->StencilBarrier();// Synch shared memory on a single nodes
mpi3synctime+=usecond();
shmmergetime-=usecond();
CommsMerge(decompress,MergersSHM,DecompressionsSHM);
shmmergetime+=usecond();
}
template<class decompressor>
void CommsMerge(decompressor decompress,std::vector<Merge> &mm,std::vector<Decompress> &dd) {
mergetime-=usecond();
void CommsMerge(decompressor decompress,std::vector<Merge> &mm,std::vector<Decompress> &dd)
{
for(int i=0;i<mm.size();i++){
auto mp = &mm[i].mpointer[0];
auto vp0= &mm[i].vpointers[0][0];
@ -603,9 +603,7 @@ public:
decompress.Exchange(mp,vp0,vp1,type,o);
});
}
mergetime+=usecond();
decompresstime-=usecond();
for(int i=0;i<dd.size();i++){
auto kp = dd[i].kernel_p;
auto mp = dd[i].mpi_p;
@ -613,7 +611,6 @@ public:
decompress.Decompress(kp,mp,o);
});
}
decompresstime+=usecond();
}
////////////////////////////////////////
// Set up routines
@ -650,19 +647,58 @@ public:
}
}
}
/// Introduce a block structure and switch off comms on boundaries
void DirichletBlock(const Coordinate &dirichlet_block)
{
this->_dirichlet = 1;
for(int ii=0;ii<this->_npoints;ii++){
int dimension = this->_directions[ii];
int displacement = this->_distances[ii];
int shift = displacement;
int gd = _grid->_gdimensions[dimension];
int fd = _grid->_fdimensions[dimension];
int pd = _grid->_processors [dimension];
int ld = gd/pd;
int pc = _grid->_processor_coor[dimension];
///////////////////////////////////////////
// Figure out dirichlet send and receive
// on this leg of stencil.
///////////////////////////////////////////
int comm_dim = _grid->_processors[dimension] >1 ;
int block = dirichlet_block[dimension];
this->_comms_send[ii] = comm_dim;
this->_comms_recv[ii] = comm_dim;
if ( block ) {
assert(abs(displacement) < ld );
if( displacement > 0 ) {
// High side, low side
// | <--B--->|
// | | |
// noR
// noS
if ( (ld*(pc+1) ) % block == 0 ) this->_comms_recv[ii] = 0;
if ( ( ld*pc ) % block == 0 ) this->_comms_send[ii] = 0;
} else {
// High side, low side
// | <--B--->|
// | | |
// noS
// noR
if ( (ld*(pc+1) ) % block == 0 ) this->_comms_send[ii] = 0;
if ( ( ld*pc ) % block == 0 ) this->_comms_recv[ii] = 0;
}
}
}
}
CartesianStencil(GridBase *grid,
int npoints,
int checkerboard,
const std::vector<int> &directions,
const std::vector<int> &distances,
Parameters p)
: shm_bytes_thr(npoints),
comm_bytes_thr(npoints),
comm_enter_thr(npoints),
comm_leave_thr(npoints),
comm_time_thr(npoints)
{
this->_dirichlet = 0;
face_table_computed=0;
_grid = grid;
this->parameters=p;
@ -675,6 +711,8 @@ public:
this->_simd_layout = _grid->_simd_layout; // copy simd_layout to give access to Accelerator Kernels
this->_directions = StencilVector(directions);
this->_distances = StencilVector(distances);
this->_comms_send.resize(npoints);
this->_comms_recv.resize(npoints);
this->same_node.resize(npoints);
_unified_buffer_size=0;
@ -693,24 +731,27 @@ public:
int displacement = distances[i];
int shift = displacement;
int gd = _grid->_gdimensions[dimension];
int fd = _grid->_fdimensions[dimension];
int pd = _grid->_processors [dimension];
int ld = gd/pd;
int rd = _grid->_rdimensions[dimension];
int pc = _grid->_processor_coor[dimension];
this->_permute_type[point]=_grid->PermuteType(dimension);
this->_checkerboard = checkerboard;
//////////////////////////
// the permute type
//////////////////////////
int simd_layout = _grid->_simd_layout[dimension];
int comm_dim = _grid->_processors[dimension] >1 ;
int splice_dim = _grid->_simd_layout[dimension]>1 && (comm_dim);
int rotate_dim = _grid->_simd_layout[dimension]>2;
this->_comms_send[ii] = comm_dim;
this->_comms_recv[ii] = comm_dim;
assert ( (rotate_dim && comm_dim) == false) ; // Do not think spread out is supported
int sshift[2];
//////////////////////////
// Underlying approach. For each local site build
// up a table containing the npoint "neighbours" and whether they
@ -811,6 +852,7 @@ public:
GridBase *grid=_grid;
const int Nsimd = grid->Nsimd();
int comms_recv = this->_comms_recv[point];
int fd = _grid->_fdimensions[dimension];
int ld = _grid->_ldimensions[dimension];
int rd = _grid->_rdimensions[dimension];
@ -867,7 +909,9 @@ public:
if ( (shiftpm== 1) && (sx<x) && (grid->_processor_coor[dimension]==grid->_processors[dimension]-1) ) {
wraparound = 1;
}
if (!offnode) {
// Wrap locally dirichlet support case OR node local
if ( (offnode==0) || (comms_recv==0) ) {
int permute_slice=0;
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
@ -984,11 +1028,14 @@ public:
}
template<class compressor>
int Gather(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor & compress,int &face_idx)
int Gather(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor & compress,int &face_idx, int point)
{
typedef typename cobj::vector_type vector_type;
typedef typename cobj::scalar_type scalar_type;
int comms_send = this->_comms_send[point] ;
int comms_recv = this->_comms_recv[point] ;
assert(rhs.Grid()==_grid);
// conformable(_grid,rhs.Grid());
@ -1014,6 +1061,8 @@ public:
if (comm_proc) {
int words = buffer_size;
if (cbmask != 0x3) words=words>>1;
@ -1045,16 +1094,20 @@ public:
recv_buf=this->u_recv_buf_p;
}
cobj *send_buf;
send_buf = this->u_send_buf_p; // Gather locally, must send
////////////////////////////////////////////////////////
// Gather locally
////////////////////////////////////////////////////////
gathertime-=usecond();
assert(send_buf!=NULL);
Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so); face_idx++;
gathertime+=usecond();
if ( comms_send )
Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so);
face_idx++;
int duplicate = CheckForDuplicate(dimension,sx,comm_proc,(void *)&recv_buf[u_comm_offset],0,bytes,cbmask);
if ( (!duplicate) ) { // Force comms for now
///////////////////////////////////////////////////////////
// Build a list of things to do after we synchronise GPUs
@ -1062,9 +1115,10 @@ public:
///////////////////////////////////////////////////////////
AddPacket((void *)&send_buf[u_comm_offset],
(void *)&recv_buf[u_comm_offset],
xmit_to_rank,
recv_from_rank,
xmit_to_rank, comms_send,
recv_from_rank, comms_recv,
bytes);
}
if ( compress.DecompressionStep() ) {
AddDecompress(&this->u_recv_buf_p[u_comm_offset],
@ -1078,11 +1132,15 @@ public:
}
template<class compressor>
int GatherSimd(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor &compress,int & face_idx)
int GatherSimd(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor &compress,int & face_idx,int point)
{
const int Nsimd = _grid->Nsimd();
const int maxl =2;// max layout in a direction
int comms_send = this->_comms_send[point] ;
int comms_recv = this->_comms_recv[point] ;
int fd = _grid->_fdimensions[dimension];
int rd = _grid->_rdimensions[dimension];
int ld = _grid->_ldimensions[dimension];
@ -1147,12 +1205,11 @@ public:
&face_table[face_idx][0],
face_table[face_idx].size()*sizeof(face_table_host[0]));
}
gathermtime-=usecond();
// if ( comms_send )
Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type);
face_idx++;
gathermtime+=usecond();
//spointers[0] -- low
//spointers[1] -- high
@ -1181,8 +1238,13 @@ public:
rpointers[i] = rp;
AddPacket((void *)sp,(void *)rp,xmit_to_rank,recv_from_rank,bytes);
int duplicate = CheckForDuplicate(dimension,sx,nbr_proc,(void *)rp,i,bytes,cbmask);
if ( !duplicate ) {
AddPacket((void *)sp,(void *)rp,
xmit_to_rank,comms_send,
recv_from_rank,comms_recv,
bytes);
}
} else {

View File

@ -55,7 +55,7 @@ template<class vtype, int N> accelerator_inline iVector<vtype, N> Exponentiate(c
// Specialisation: Cayley-Hamilton exponential for SU(3)
#ifndef GRID_CUDA
#ifndef GRID_ACCELERATED
template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr>
accelerator_inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP )
{

View File

@ -441,7 +441,7 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(bas
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
{
hipMemcpyAsync(to,from,bytes, hipMemcpyDeviceToDevice,copyStream);
hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);
}
inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); };
@ -461,6 +461,8 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream);
accelerator_for2dNB(iter1, num1, iter2, num2, nsimd, { __VA_ARGS__ } ); \
accelerator_barrier(dummy);
#define GRID_ACCELERATED
#endif
//////////////////////////////////////////////
@ -481,9 +483,10 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream);
#define accelerator_for2d(iter1, num1, iter2, num2, nsimd, ... ) thread_for2d(iter1,num1,iter2,num2,{ __VA_ARGS__ });
accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { memcpy(to,from,bytes);}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ memcpy(to,from,bytes);}
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { memcpy(to,from,bytes);}
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes); }
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ thread_bcopy(from,to,bytes);}
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes);}
inline void acceleratorCopySynchronise(void) {};
inline int acceleratorIsCommunicable(void *ptr){ return 1; }

View File

@ -72,3 +72,20 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define thread_region DO_PRAGMA(omp parallel)
#define thread_critical DO_PRAGMA(omp critical)
#ifdef GRID_OMP
inline void thread_bcopy(void *from, void *to,size_t bytes)
{
uint64_t *ufrom = (uint64_t *)from;
uint64_t *uto = (uint64_t *)to;
assert(bytes%8==0);
uint64_t words=bytes/8;
thread_for(w,words,{
uto[w] = ufrom[w];
});
}
#else
inline void thread_bcopy(void *from, void *to,size_t bytes)
{
bcopy(from,to,bytes);
}
#endif

265
HMC/Mobius2p1f_DD_RHMC.cc Normal file
View File

@ -0,0 +1,265 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_hmc_EODWFRatio.cc
Copyright (C) 2015-2016
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
int main(int argc, char **argv) {
using namespace Grid;
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
// Typedefs to simplify notation
typedef WilsonImplR FermionImplPolicy;
typedef MobiusFermionR FermionAction;
typedef typename FermionAction::FermionField FermionField;
typedef Grid::XmlReader Serialiser;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// MD.name = std::string("Leap Frog");
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
// MD.name = std::string("Force Gradient");
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
MD.name = std::string("MinimumNorm2");
MD.MDsteps = 4;
MD.trajL = 1.0;
HMCparameters HMCparams;
HMCparams.StartTrajectory = 17;
HMCparams.Trajectories = 200;
HMCparams.NoMetropolisUntil= 0;
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
// HMCparams.StartingType =std::string("ColdStart");
HMCparams.StartingType =std::string("CheckpointStart");
HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams);
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_DDHMC_lat";
CPparams.rng_prefix = "ckpoint_DDHMC_rng";
CPparams.saveInterval = 1;
CPparams.format = "IEEE64BIG";
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
RNGModuleParameters RNGpar;
RNGpar.serial_seeds = "1 2 3 4 5";
RNGpar.parallel_seeds = "6 7 8 9 10";
TheHMC.Resources.SetRNGSeeds(RNGpar);
// Construct observables
// here there is too much indirection
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
const int Ls = 16;
RealD M5 = 1.8;
RealD b = 1.0;
RealD c = 0.0;
Real beta = 2.13;
Real light_mass = 0.01;
Real strange_mass = 0.04;
Real pv_mass = 1.0;
std::vector<Real> hasenbusch({ light_mass, 0.04, 0.25, 0.4, 0.7 , pv_mass });
// FIXME:
// Same in MC and MD
// Need to mix precision too
OneFlavourRationalParams SFRp;
SFRp.lo = 4.0e-3;
SFRp.hi = 30.0;
SFRp.MaxIter = 10000;
SFRp.tolerance= 1.0e-8;
SFRp.mdtolerance= 1.0e-6;
SFRp.degree = 16;
SFRp.precision= 50;
SFRp.BoundsCheckFreq=5;
OneFlavourRationalParams OFRp;
OFRp.lo = 1.0e-4;
OFRp.hi = 30.0;
OFRp.MaxIter = 10000;
OFRp.tolerance= 1.0e-8;
OFRp.mdtolerance= 1.0e-6;
OFRp.degree = 16;
OFRp.precision= 50;
OFRp.BoundsCheckFreq=5;
auto GridPtr = TheHMC.Resources.GetCartesian();
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
////////////////////////////////////////////////////////////////
// Domain decomposed
////////////////////////////////////////////////////////////////
Coordinate latt4 = GridPtr->GlobalDimensions();
Coordinate mpi = GridPtr->ProcessorGrid();
Coordinate shm;
GlobalSharedMemory::GetShmDims(mpi,shm);
Coordinate CommDim(Nd);
for(int d=0;d<Nd;d++) CommDim[d]= (mpi[d]/shm[d])>1 ? 1 : 0;
Coordinate Dirichlet(Nd+1,0);
Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0];
Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1];
Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2];
Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3];
Coordinate Block4(Nd);
Block4[0] = Dirichlet[1];
Block4[1] = Dirichlet[2];
Block4[2] = Dirichlet[3];
Block4[3] = Dirichlet[4];
int Width=3;
TheHMC.Resources.SetMomentumFilter(new DDHMCFilter<WilsonImplR::Field>(Block4,Width));
//////////////////////////
// Fermion Grid
//////////////////////////
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
IwasakiGaugeActionR GaugeAction(beta);
// temporarily need a gauge field
LatticeGaugeField U(GridPtr);
// These lines are unecessary if BC are all periodic
std::vector<Complex> boundary = {1,1,1,-1};
FermionAction::ImplParams Params(boundary);
double StoppingCondition = 1e-8;
double MaxCGIterations = 30000;
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
////////////////////////////////////
// Collect actions
////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1);
ActionLevel<HMCWrapper::Field> Level2(4);
ActionLevel<HMCWrapper::Field> Level3(6);
////////////////////////////////////
// Strange action
////////////////////////////////////
FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params);
FermionAction StrangeOpDir (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
FermionAction StrangePauliVillarsOpDir(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params);
StrangeOpDir.DirichletBlock(Dirichlet);
StrangePauliVillarsOpDir.DirichletBlock(Dirichlet);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionBdy(StrangeOpDir,StrangeOp,SFRp);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionLocal(StrangePauliVillarsOpDir,StrangeOpDir,SFRp);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionPVBdy(StrangePauliVillarsOp,StrangePauliVillarsOpDir,SFRp);
Level1.push_back(&StrangePseudoFermionBdy);
Level2.push_back(&StrangePseudoFermionLocal);
Level1.push_back(&StrangePseudoFermionPVBdy);
////////////////////////////////////
// up down action
////////////////////////////////////
std::vector<Real> light_den;
std::vector<Real> light_num;
std::vector<int> dirichlet_den;
std::vector<int> dirichlet_num;
int n_hasenbusch = hasenbusch.size();
light_den.push_back(light_mass); dirichlet_den.push_back(0);
for(int h=0;h<n_hasenbusch;h++){
light_den.push_back(hasenbusch[h]); dirichlet_den.push_back(1);
}
for(int h=0;h<n_hasenbusch;h++){
light_num.push_back(hasenbusch[h]); dirichlet_num.push_back(1);
}
light_num.push_back(pv_mass); dirichlet_num.push_back(0);
std::vector<FermionAction *> Numerators;
std::vector<FermionAction *> Denominators;
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
std::vector<OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
for(int h=0;h<n_hasenbusch+1;h++){
std::cout << GridLogMessage
<< " 2f quotient Action ";
std::cout << "det D("<<light_den[h]<<")";
if ( dirichlet_den[h] ) std::cout << "^dirichlet ";
std::cout << "/ det D("<<light_num[h]<<")";
if ( dirichlet_num[h] ) std::cout << "^dirichlet ";
std::cout << std::endl;
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
if(h!=0) {
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],CG,CG));
} else {
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
}
if ( dirichlet_den[h]==1) Denominators[h]->DirichletBlock(Dirichlet);
if ( dirichlet_num[h]==1) Numerators[h]->DirichletBlock(Dirichlet);
}
int nquo=Quotients.size();
Level1.push_back(Bdys[0]);
Level1.push_back(Bdys[1]);
for(int h=0;h<nquo-1;h++){
Level2.push_back(Quotients[h]);
}
Level1.push_back(Quotients[nquo-1]); // PV dirichlet fix on coarse timestep
/////////////////////////////////////////////////////////////
// Gauge action
/////////////////////////////////////////////////////////////
Level3.push_back(&GaugeAction);
TheHMC.TheAction.push_back(Level1);
TheHMC.TheAction.push_back(Level2);
TheHMC.TheAction.push_back(Level3);
std::cout << GridLogMessage << " Action complete "<< std::endl;
/////////////////////////////////////////////////////////////
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
TheHMC.Run(); // no smearing
Grid_finalize();
} // main

View File

@ -217,9 +217,9 @@ int main (int argc, char ** argv)
dbytes+=
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,
xmit_to_rank,1,
(void *)&rbuf[mu][0],
recv_from_rank,
recv_from_rank,1,
bytes,mu);
comm_proc = mpi_layout[mu]-1;
@ -228,9 +228,9 @@ int main (int argc, char ** argv)
dbytes+=
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,
xmit_to_rank,1,
(void *)&rbuf[mu+4][0],
recv_from_rank,
recv_from_rank,1,
bytes,mu+4);
}
@ -309,9 +309,9 @@ int main (int argc, char ** argv)
dbytes+=
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,
xmit_to_rank,1,
(void *)&rbuf[mu][0],
recv_from_rank,
recv_from_rank,1,
bytes,mu);
Grid.StencilSendToRecvFromComplete(requests,mu);
requests.resize(0);
@ -322,9 +322,9 @@ int main (int argc, char ** argv)
dbytes+=
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,
xmit_to_rank,1,
(void *)&rbuf[mu+4][0],
recv_from_rank,
recv_from_rank,1,
bytes,mu+4);
Grid.StencilSendToRecvFromComplete(requests,mu+4);
requests.resize(0);
@ -411,8 +411,8 @@ int main (int argc, char ** argv)
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
}
int tid = omp_get_thread_num();
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
(void *)&rbuf[dir][0], recv_from_rank, bytes,tid);
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,1,
(void *)&rbuf[dir][0], recv_from_rank,1, bytes,tid);
thread_critical { dbytes+=tbytes; }
}

View File

@ -32,18 +32,18 @@
using namespace std;
using namespace Grid;
template<class d>
struct scal {
d internal;
};
////////////////////////
/// Move to domains ////
////////////////////////
Gamma::Algebra Gmu [] = {
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT
};
};
void Benchmark(int Ls, Coordinate Dirichlet);
int main (int argc, char ** argv)
{
@ -52,24 +52,82 @@ int main (int argc, char ** argv)
int threads = GridThread::GetThreads();
Coordinate latt4 = GridDefaultLatt();
int Ls=16;
for(int i=0;i<argc;i++)
for(int i=0;i<argc;i++) {
if(std::string(argv[i]) == "-Ls"){
std::stringstream ss(argv[i+1]); ss >> Ls;
}
}
//////////////////
// With comms
//////////////////
Coordinate Dirichlet(Nd+1,0);
std::cout << "\n\n\n\n\n\n" <<std::endl;
std::cout << GridLogMessage<< "++++++++++++++++++++++++++++++++++++++++++++++++" <<std::endl;
std::cout << GridLogMessage<< " Testing with full communication " <<std::endl;
std::cout << GridLogMessage<< "++++++++++++++++++++++++++++++++++++++++++++++++" <<std::endl;
Benchmark(Ls,Dirichlet);
//////////////////
// Domain decomposed
//////////////////
Coordinate latt4 = GridDefaultLatt();
Coordinate mpi = GridDefaultMpi();
Coordinate CommDim(Nd);
Coordinate shm;
GlobalSharedMemory::GetShmDims(mpi,shm);
//////////////////////
// Node level
//////////////////////
std::cout << "\n\n\n\n\n\n" <<std::endl;
std::cout << GridLogMessage<< "++++++++++++++++++++++++++++++++++++++++++++++++" <<std::endl;
std::cout << GridLogMessage<< " Testing without internode communication " <<std::endl;
std::cout << GridLogMessage<< "++++++++++++++++++++++++++++++++++++++++++++++++" <<std::endl;
for(int d=0;d<Nd;d++) CommDim[d]= (mpi[d]/shm[d])>1 ? 1 : 0;
Dirichlet[0] = 0;
Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0];
Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1];
Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2];
Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3];
Benchmark(Ls,Dirichlet);
std::cout << "\n\n\n\n\n\n" <<std::endl;
std::cout << GridLogMessage<< "++++++++++++++++++++++++++++++++++++++++++++++++" <<std::endl;
std::cout << GridLogMessage<< " Testing without intranode communication " <<std::endl;
std::cout << GridLogMessage<< "++++++++++++++++++++++++++++++++++++++++++++++++" <<std::endl;
for(int d=0;d<Nd;d++) CommDim[d]= mpi[d]>1 ? 1 : 0;
Dirichlet[0] = 0;
Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0];
Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1];
Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2];
Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3];
Benchmark(Ls,Dirichlet);
Grid_finalize();
exit(0);
}
void Benchmark(int Ls, Coordinate Dirichlet)
{
Coordinate latt4 = GridDefaultLatt();
GridLogLayout();
long unsigned int single_site_flops = 8*Nc*(7+16*Nc);
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
@ -80,9 +138,9 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
GridParallelRNG RNG4(UGrid); RNG4.SeedUniqueString(std::string("The 4D RNG"));
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
GridParallelRNG RNG5(FGrid); RNG5.SeedUniqueString(std::string("The 5D RNG"));
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
LatticeFermionF src (FGrid); random(RNG5,src);
#if 0
@ -100,7 +158,6 @@ int main (int argc, char ** argv)
src = src*N2;
#endif
LatticeFermionF result(FGrid); result=Zero();
LatticeFermionF ref(FGrid); ref=Zero();
LatticeFermionF tmp(FGrid);
@ -108,29 +165,31 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
LatticeGaugeFieldF Umu(UGrid);
LatticeGaugeFieldF UmuCopy(UGrid);
SU<Nc>::HotConfiguration(RNG4,Umu);
UmuCopy=Umu;
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
#if 0
Umu=1.0;
for(int mu=0;mu<Nd;mu++){
LatticeColourMatrixF ttmp(UGrid);
ttmp = PeekIndex<LorentzIndex>(Umu,mu);
// if (mu !=2 ) ttmp = 0;
// ttmp = ttmp* pow(10.0,mu);
PokeIndex<LorentzIndex>(Umu,ttmp,mu);
}
std::cout << GridLogMessage << "Forced to diagonal " << std::endl;
#endif
////////////////////////////////////
// Apply BCs
////////////////////////////////////
Coordinate Block(4);
for(int d=0;d<4;d++) Block[d]= Dirichlet[d+1];
std::cout << GridLogMessage << "Applying BCs for Dirichlet Block5 " << Dirichlet << std::endl;
std::cout << GridLogMessage << "Applying BCs for Dirichlet Block4 " << Block << std::endl;
DirichletFilter<LatticeGaugeFieldF> Filter(Block);
Filter.applyFilter(Umu);
////////////////////////////////////
// Naive wilson implementation
////////////////////////////////////
// replicate across fifth dimension
// LatticeGaugeFieldF Umu5d(FGrid);
std::vector<LatticeColourMatrixF> U(4,UGrid);
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
}
std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl;
if (1)
@ -191,11 +250,13 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
DomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
Dw.DirichletBlock(Dirichlet);
Dw.ImportGauge(Umu);
int ncall =300;
if (1) {
FGrid->Barrier();
Dw.ZeroCounters();
Dw.Dhop(src,result,0);
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
double t0=usecond();
@ -220,29 +281,20 @@ int main (int argc, char ** argv)
double data_mem = (volume * (2*Nd+1)*Nd*Nc + (volume/Ls) *2*Nd*Nc*Nc) * simdwidth / nsimd * ncall / (1024.*1024.*1024.);
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
std::cout<<GridLogMessage << "RF GiB/s (base 2) = "<< 1000000. * data_rf/((t1-t0))<<std::endl;
std::cout<<GridLogMessage << "mem GiB/s (base 2) = "<< 1000000. * data_mem/((t1-t0))<<std::endl;
// std::cout<<GridLogMessage << "RF GiB/s (base 2) = "<< 1000000. * data_rf/((t1-t0))<<std::endl;
// std::cout<<GridLogMessage << "mem GiB/s (base 2) = "<< 1000000. * data_mem/((t1-t0))<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
//exit(0);
if(( norm2(err)>1.0e-4) ) {
/*
std::cout << "RESULT\n " << result<<std::endl;
std::cout << "REF \n " << ref <<std::endl;
std::cout << "ERR \n " << err <<std::endl;
*/
std::cout<<GridLogMessage << "WRONG RESULT" << std::endl;
FGrid->Barrier();
exit(-1);
}
assert (norm2(err)< 1.0e-4 );
Dw.Report();
}
if (1)
@ -286,21 +338,20 @@ int main (int argc, char ** argv)
}
ref = -0.5*ref;
}
// dump=1;
Dw.Dhop(src,result,1);
Dw.Dhop(src,result,DaggerYes);
std::cout << GridLogMessage << "----------------------------------------------------------------" << std::endl;
std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl;
std::cout << GridLogMessage << "----------------------------------------------------------------" << std::endl;
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
std::cout<<GridLogMessage << "norm dag result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm dag ref "<< norm2(ref)<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm dag diff "<< norm2(err)<<std::endl;
if((norm2(err)>1.0e-4)){
/*
std::cout<< "DAG RESULT\n " <<ref << std::endl;
std::cout<< "DAG sRESULT\n " <<result << std::endl;
std::cout<< "DAG ERR \n " << err <<std::endl;
*/
}
assert((norm2(err)<1.0e-4));
LatticeFermionF src_e (FrbGrid);
LatticeFermionF src_o (FrbGrid);
LatticeFermionF r_e (FrbGrid);
@ -330,7 +381,6 @@ int main (int argc, char ** argv)
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
{
Dw.ZeroCounters();
FGrid->Barrier();
Dw.DhopEO(src_o,r_e,DaggerNo);
double t0=usecond();
@ -352,7 +402,6 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
Dw.Report();
}
Dw.DhopEO(src_o,r_e,DaggerNo);
Dw.DhopOE(src_e,r_o,DaggerNo);
@ -367,13 +416,7 @@ int main (int argc, char ** argv)
err = r_eo-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
if((norm2(err)>1.0e-4)){
/*
std::cout<< "Deo RESULT\n " <<r_eo << std::endl;
std::cout<< "Deo REF\n " <<result << std::endl;
std::cout<< "Deo ERR \n " << err <<std::endl;
*/
}
assert(norm2(err)<1.0e-4);
pickCheckerboard(Even,src_e,err);
pickCheckerboard(Odd,src_o,err);
@ -382,6 +425,4 @@ int main (int argc, char ** argv)
assert(norm2(src_e)<1.0e-4);
assert(norm2(src_o)<1.0e-4);
Grid_finalize();
exit(0);
}

View File

@ -0,0 +1,26 @@
#!/bin/bash
# Begin LSF Directives
#SBATCH -A LGT104
#SBATCH -t 01:00:00
##SBATCH -U openmpThu
#SBATCH -p ecp
#SBATCH -J comms
#SBATCH -o comms.%J
#SBATCH -e comms.%J
#SBATCH -N 1
#SBATCH -n 2
DIR=.
module list
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1
#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
#export MPICH_SMP_SINGLE_COPY_MODE=CMA
export MPICH_SMP_SINGLE_COPY_MODE=NONE
export OMP_NUM_THREADS=8
AT=8
echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE
PARAMS=" --accelerator-threads ${AT} --grid 64.64.32.32 --mpi 2.1.1.1 "
srun -n2 --label -c$OMP_NUM_THREADS --gpus-per-task=1 ./mpiwrapper.sh ./benchmarks/Benchmark_comms_host_device $PARAMS

View File

@ -5,6 +5,8 @@
--enable-gen-simd-width=64 \
--enable-simd=GPU \
--disable-fermion-reps \
--with-gmp=$OLCF_GMP_ROOT \
--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
--disable-gparity \
CXX=hipcc MPICXX=mpicxx \
CXXFLAGS="-fPIC -I/opt/rocm-4.5.0/include/ -std=c++14 -I${MPICH_DIR}/include " \

View File

@ -3,28 +3,28 @@
#SBATCH -A LGT104
#SBATCH -t 01:00:00
##SBATCH -U openmpThu
##SBATCH -p ecp
#SBATCH -J DWF
#SBATCH -o DWF.%J
#SBATCH -e DWF.%J
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -n 8
#SBATCH --exclusive
#SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4
DIR=.
module list
#export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1
export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
#export MPICH_SMP_SINGLE_COPY_MODE=NONE
#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
export MPICH_SMP_SINGLE_COPY_MODE=NONE
#export MPICH_SMP_SINGLE_COPY_MODE=CMA
export OMP_NUM_THREADS=1
AT=8
echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE
PARAMS=" --accelerator-threads ${AT} --grid 24.24.24.24 --shm-mpi 0 --mpi 1.1.1.1"
srun --gpus-per-task 1 -n1 ./benchmarks/Benchmark_dwf_fp32 $PARAMS
PARAMS=" --accelerator-threads 16 --grid 32.32.32.256 --mpi 1.1.1.8 --comms-overlap --shm 2048 --shm-mpi 0"
echo $PARAMS
srun --gpus-per-task 1 -n8 ./benchmarks/Benchmark_dwf_fp32 $PARAMS

View File

@ -6,22 +6,43 @@
#SBATCH -J DWF
#SBATCH -o DWF.%J
#SBATCH -e DWF.%J
#SBATCH -N 1
#SBATCH -n 8
#SBATCH -N 8
#SBATCH -n 64
#SBATCH --exclusive
#SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4
DIR=.
module list
export MPICH_OFI_NIC_POLICY=GPU
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1
export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
#export MPICH_SMP_SINGLE_COPY_MODE=NONE
#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
#export MPICH_SMP_SINGLE_COPY_MODE=CMA
export MPICH_SMP_SINGLE_COPY_MODE=NONE
export OMP_NUM_THREADS=1
echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE
PARAMS=" --accelerator-threads 8 --grid 32.64.64.64 --mpi 1.2.2.2 --comms-overlap --shm 2048 --shm-mpi 0"
srun --gpus-per-task 1 -n8 ./mpiwrapper.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS
PARAMS=" --accelerator-threads 16 --grid 64.64.64.256 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 0"
echo $PARAMS
#srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_dwf_fp32 $PARAMS > dwf.64.64.64.256.8node
PARAMS=" --accelerator-threads 16 --grid 64.64.64.32 --mpi 4.4.4.1 --comms-overlap --shm 2048 --shm-mpi 1"
echo $PARAMS
srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_dwf_fp32 $PARAMS > dwf.64.64.64.32.8node
PARAMS=" --accelerator-threads 16 --grid 64.64.64.32 --mpi 4.4.4.1 --comms-overlap --shm 2048 --shm-mpi 0"
echo $PARAMS
#srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_dwf_fp32 $PARAMS > dwf.64.64.64.32.8node.shm0
PARAMS=" --accelerator-threads 16 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 1"
echo $PARAMS
#srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node
PARAMS=" --accelerator-threads 16 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 0"
echo $PARAMS
#srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node_shm0

View File

@ -1,10 +1,11 @@
#!/bin/bash
lrank=$SLURM_LOCALID
lgpu=(0 1 2 3 7 6 5 4)
export ROCR_VISIBLE_DEVICES=$SLURM_LOCALID
export ROCR_VISIBLE_DEVICES=${lgpu[$lrank]}
echo "`hostname` - $lrank device=$ROCR_VISIBLE_DEVICES binding=$BINDING"
echo "`hostname` - $lrank device=$ROCR_VISIBLE_DEVICES "
$*

View File

@ -3,3 +3,4 @@ module load rocm/4.5.0
module load gmp
module load cray-fftw
module load craype-accel-amd-gfx90a
export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH

View File

@ -6,6 +6,8 @@
--enable-simd=GPU \
--disable-fermion-reps \
--disable-gparity \
--with-gmp=$OLCF_GMP_ROOT \
--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
CXX=hipcc MPICXX=mpicxx \
CXXFLAGS="-fPIC -I/opt/rocm-4.3.0/include/ -std=c++14 -I${MPICH_DIR}/include " \
--prefix=/ccs/home/chulwoo/Grid \

View File

@ -1,8 +1,7 @@
#!/bin/bash
# Begin LSF Directives
#SBATCH -A LGT104
#SBATCH -t 01:00:00
##SBATCH -U openmpThu
#SBATCH -t 3:00:00
#SBATCH -p ecp
#SBATCH -J DWF
#SBATCH -o DWF.%J
@ -14,13 +13,12 @@ DIR=.
module list
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1
#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
export MPICH_SMP_SINGLE_COPY_MODE=NONE
#export MPICH_SMP_SINGLE_COPY_MODE=CMA
export MPICH_SMP_SINGLE_COPY_MODE=CMA
export OMP_NUM_THREADS=8
AT=8
echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE
PARAMS=" --accelerator-threads ${AT} --grid 32.64.64.64 --mpi 1.2.2.2 --comms-overlap --shm 2048 --shm-mpi 0"
srun -n8 --label -c$OMP_NUM_THREADS --gpus-per-task=1 ./mpiwrapper.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS
PARAMS=" --accelerator-threads ${AT} --grid 16.16.16.48 --mpi 1.2.2.2 --comms-overlap --shm 2048 --shm-mpi 0"
srun -N2 -n8 --label -c$OMP_NUM_THREADS --gpus-per-task=1 ./mpiwrapper.sh ./HMC/Mobius2p1f_DD_RHMC $PARAMS

View File

@ -1,5 +1,9 @@
module load emacs
module load PrgEnv-gnu
module load rocm/4.3.0
module load rocm/4.5.0
module load gmp
module load cray-fftw
module load craype-accel-amd-gfx908
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1
export LD_LIBRARY_PATH=/opt/cray/pe/gcc/mpfr/3.1.4/lib/:$LD_LIBRARY_PATH

View File

@ -1,25 +1,25 @@
tu-c0r0n00 - 0 device=0 binding=--interleave=0,1
tu-c0r0n00 - 1 device=1 binding=--interleave=2,3
tu-c0r0n09 - 1 device=1 binding=--interleave=2,3
tu-c0r0n00 - 2 device=2 binding=--interleave=4,5
tu-c0r0n06 - 0 device=0 binding=--interleave=0,1
tu-c0r0n06 - 1 device=1 binding=--interleave=2,3
tu-c0r0n09 - 0 device=0 binding=--interleave=0,1
tu-c0r0n09 - 2 device=2 binding=--interleave=4,5
tu-c0r0n03 - 1 device=1 binding=--interleave=2,3
tu-c0r0n06 - 2 device=2 binding=--interleave=4,5
tu-c0r0n09 - 3 device=3 binding=--interleave=6,7
tu-c0r0n00 - 3 device=3 binding=--interleave=6,7
tu-c0r0n03 - 0 device=0 binding=--interleave=0,1
tu-c0r0n03 - 2 device=2 binding=--interleave=4,5
tu-c0r0n06 - 3 device=3 binding=--interleave=6,7
tu-c0r0n03 - 3 device=3 binding=--interleave=6,7
tu-c0r3n00 - 0 device=0 binding=--interleave=0,1
tu-c0r3n00 - 1 device=1 binding=--interleave=2,3
tu-c0r3n00 - 2 device=2 binding=--interleave=4,5
tu-c0r3n00 - 3 device=3 binding=--interleave=6,7
tu-c0r3n06 - 1 device=1 binding=--interleave=2,3
tu-c0r3n06 - 3 device=3 binding=--interleave=6,7
tu-c0r3n06 - 0 device=0 binding=--interleave=0,1
tu-c0r3n06 - 2 device=2 binding=--interleave=4,5
tu-c0r3n03 - 1 device=1 binding=--interleave=2,3
tu-c0r3n03 - 2 device=2 binding=--interleave=4,5
tu-c0r3n03 - 0 device=0 binding=--interleave=0,1
tu-c0r3n03 - 3 device=3 binding=--interleave=6,7
tu-c0r3n09 - 0 device=0 binding=--interleave=0,1
tu-c0r3n09 - 1 device=1 binding=--interleave=2,3
tu-c0r3n09 - 2 device=2 binding=--interleave=4,5
tu-c0r3n09 - 3 device=3 binding=--interleave=6,7
OPENMPI detected
AcceleratorCudaInit: using default device
AcceleratorCudaInit: assume user either uses a) IBM jsrun, or
AcceleratorCudaInit: assume user either uses
AcceleratorCudaInit: a) IBM jsrun, or
AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding
AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no
AcceleratorCudaInit: ================================================
AcceleratorCudaInit: Configure options --enable-setdevice=no
OPENMPI detected
AcceleratorCudaInit[0]: ========================
AcceleratorCudaInit[0]: Device Number : 0
@ -33,11 +33,41 @@ AcceleratorCudaInit[0]: pciBusID: 3
AcceleratorCudaInit[0]: pciDeviceID: 0
AcceleratorCudaInit[0]: maxGridSize (2147483647,65535,65535)
AcceleratorCudaInit: using default device
AcceleratorCudaInit: assume user either uses a) IBM jsrun, or
AcceleratorCudaInit: assume user either uses
AcceleratorCudaInit: a) IBM jsrun, or
AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding
AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no
AcceleratorCudaInit: ================================================
AcceleratorCudaInit: Configure options --enable-setdevice=no
OPENMPI detected
AcceleratorCudaInit: using default device
AcceleratorCudaInit: assume user either uses
AcceleratorCudaInit: a) IBM jsrun, or
AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding
AcceleratorCudaInit: Configure options --enable-setdevice=no
OPENMPI detected
AcceleratorCudaInit: using default device
AcceleratorCudaInit: assume user either uses
AcceleratorCudaInit: a) IBM jsrun, or
AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding
AcceleratorCudaInit: Configure options --enable-setdevice=no
OPENMPI detected
AcceleratorCudaInit: using default device
AcceleratorCudaInit: assume user either uses
AcceleratorCudaInit: a) IBM jsrun, or
AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding
AcceleratorCudaInit: Configure options --enable-setdevice=no
OPENMPI detected
OPENMPI detected
AcceleratorCudaInit: using default device
AcceleratorCudaInit: assume user either uses
AcceleratorCudaInit: a) IBM jsrun, or
AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding
AcceleratorCudaInit: Configure options --enable-setdevice=no
OPENMPI detected
AcceleratorCudaInit: using default device
AcceleratorCudaInit: assume user either uses
AcceleratorCudaInit: a) IBM jsrun, or
AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding
AcceleratorCudaInit: Configure options --enable-setdevice=no
AcceleratorCudaInit[0]: ========================
AcceleratorCudaInit[0]: Device Number : 0
AcceleratorCudaInit[0]: ========================
@ -50,43 +80,25 @@ AcceleratorCudaInit[0]: pciBusID: 3
AcceleratorCudaInit[0]: pciDeviceID: 0
AcceleratorCudaInit[0]: maxGridSize (2147483647,65535,65535)
AcceleratorCudaInit: using default device
AcceleratorCudaInit: assume user either uses a) IBM jsrun, or
AcceleratorCudaInit: assume user either uses
AcceleratorCudaInit: a) IBM jsrun, or
AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding
AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no
AcceleratorCudaInit: Configure options --enable-setdevice=no
local rank 1 device 0 bus id: 0000:44:00.0
AcceleratorCudaInit: ================================================
OPENMPI detected
AcceleratorCudaInit: using default device
AcceleratorCudaInit: assume user either uses a) IBM jsrun, or
AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding
AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no
local rank 0 device 0 bus id: 0000:03:00.0
AcceleratorCudaInit: ================================================
OPENMPI detected
AcceleratorCudaInit: using default device
AcceleratorCudaInit: assume user either uses a) IBM jsrun, or
AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding
AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no
AcceleratorCudaInit: ================================================
OPENMPI detected
AcceleratorCudaInit: using default device
AcceleratorCudaInit: assume user either uses a) IBM jsrun, or
AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding
AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no
AcceleratorCudaInit: ================================================
OPENMPI detected
AcceleratorCudaInit: using default device
AcceleratorCudaInit: assume user either uses a) IBM jsrun, or
AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding
AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no
AcceleratorCudaInit: ================================================
OPENMPI detected
AcceleratorCudaInit: using default device
AcceleratorCudaInit: assume user either uses a) IBM jsrun, or
AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding
AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no
AcceleratorCudaInit: ================================================
local rank 0 device 0 bus id: 0000:03:00.0
AcceleratorCudaInit: ================================================
AcceleratorCudaInit: ================================================
local rank 2 device 0 bus id: 0000:84:00.0
SharedMemoryMpi: World communicator of size 16
SharedMemoryMpi: Node communicator of size 4
0SharedMemoryMpi: SharedMemoryMPI.cc acceleratorAllocDevice 2147483648bytes at 0x7fcd80000000 for comms buffers
0SharedMemoryMpi: SharedMemoryMPI.cc acceleratorAllocDevice 2147483648bytes at 0x153960000000 for comms buffers
Setting up IPC
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
@ -116,7 +128,7 @@ This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Current Grid git commit hash=9d2238148c56e3fbadfa95dcabf2b83d4bde14cd: (HEAD -> develop) uncommited changes
Current Grid git commit hash=da06d15f73184ceb15d66d4e7e702b02fed7b940: (HEAD -> feature/dirichlet, develop) uncommited changes
Grid : Message : ================================================
Grid : Message : MPI is initialised and logging filters activated
@ -124,122 +136,102 @@ Grid : Message : ================================================
Grid : Message : Requested 2147483648 byte stencil comms buffers
Grid : Message : MemoryManager Cache 34004218675 bytes
Grid : Message : MemoryManager::Init() setting up
Grid : Message : MemoryManager::Init() cache pool for recent allocations: SMALL 32 LARGE 8
Grid : Message : MemoryManager::Init() cache pool for recent allocations: SMALL 8 LARGE 2
Grid : Message : MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory
Grid : Message : MemoryManager::Init() Using cudaMalloc
Grid : Message : 1.198523 s : Grid Layout
Grid : Message : 1.198530 s : Global lattice size : 64 64 64 64
Grid : Message : 1.198534 s : OpenMP threads : 4
Grid : Message : 1.198535 s : MPI tasks : 2 2 2 2
Grid : Message : 1.397615 s : Making s innermost grids
Grid : Message : 1.441828 s : Initialising 4d RNG
Grid : Message : 1.547973 s : Intialising parallel RNG with unique string 'The 4D RNG'
Grid : Message : 1.547998 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1
Grid : Message : 1.954777 s : Initialising 5d RNG
Grid : Message : 3.633825 s : Intialising parallel RNG with unique string 'The 5D RNG'
Grid : Message : 3.633869 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a
Grid : Message : 12.162710 s : Initialised RNGs
Grid : Message : 15.882520 s : Drawing gauge field
Grid : Message : 15.816362 s : Random gauge initialised
Grid : Message : 17.279671 s : Setting up Cshift based reference
Grid : Message : 26.331426 s : *****************************************************************
Grid : Message : 26.331452 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm
Grid : Message : 26.331454 s : *****************************************************************
Grid : Message : 26.331456 s : *****************************************************************
Grid : Message : 26.331458 s : * Benchmarking DomainWallFermionR::Dhop
Grid : Message : 26.331459 s : * Vectorising space-time by 8
Grid : Message : 26.331463 s : * VComplexF size is 64 B
Grid : Message : 26.331465 s : * SINGLE precision
Grid : Message : 26.331467 s : * Using Overlapped Comms/Compute
Grid : Message : 26.331468 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 26.331469 s : *****************************************************************
Grid : Message : 28.413717 s : Called warmup
Grid : Message : 56.418423 s : Called Dw 3000 times in 2.80047e+07 us
Grid : Message : 56.418476 s : mflop/s = 3.79581e+07
Grid : Message : 56.418479 s : mflop/s per rank = 2.37238e+06
Grid : Message : 56.418481 s : mflop/s per node = 9.48953e+06
Grid : Message : 56.418483 s : RF GiB/s (base 2) = 77130
Grid : Message : 56.418485 s : mem GiB/s (base 2) = 48206.3
Grid : Message : 56.422076 s : norm diff 1.03481e-13
Grid : Message : 56.456894 s : #### Dhop calls report
Grid : Message : 56.456899 s : WilsonFermion5D Number of DhopEO Calls : 6002
Grid : Message : 56.456903 s : WilsonFermion5D TotalTime /Calls : 4710.93 us
Grid : Message : 56.456905 s : WilsonFermion5D CommTime /Calls : 3196.15 us
Grid : Message : 56.456908 s : WilsonFermion5D FaceTime /Calls : 494.392 us
Grid : Message : 56.456910 s : WilsonFermion5D ComputeTime1/Calls : 44.4107 us
Grid : Message : 56.456912 s : WilsonFermion5D ComputeTime2/Calls : 1037.75 us
Grid : Message : 56.456921 s : Average mflops/s per call : 3.55691e+09
Grid : Message : 56.456925 s : Average mflops/s per call per rank : 2.22307e+08
Grid : Message : 56.456928 s : Average mflops/s per call per node : 8.89228e+08
Grid : Message : 56.456930 s : Average mflops/s per call (full) : 3.82915e+07
Grid : Message : 56.456933 s : Average mflops/s per call per rank (full): 2.39322e+06
Grid : Message : 56.456952 s : Average mflops/s per call per node (full): 9.57287e+06
Grid : Message : 56.456954 s : WilsonFermion5D Stencil
Grid : Message : 56.457016 s : Stencil calls 3001
Grid : Message : 56.457022 s : Stencil halogtime 0
Grid : Message : 56.457024 s : Stencil gathertime 55.9154
Grid : Message : 56.457026 s : Stencil gathermtime 20.1073
Grid : Message : 56.457028 s : Stencil mergetime 18.5585
Grid : Message : 56.457030 s : Stencil decompresstime 0.0639787
Grid : Message : 56.457032 s : Stencil comms_bytes 4.02653e+08
Grid : Message : 56.457034 s : Stencil commtime 6379.93
Grid : Message : 56.457036 s : Stencil 63.1124 GB/s per rank
Grid : Message : 56.457038 s : Stencil 252.45 GB/s per node
Grid : Message : 56.457040 s : WilsonFermion5D StencilEven
Grid : Message : 56.457048 s : WilsonFermion5D StencilOdd
Grid : Message : 56.457062 s : WilsonFermion5D Stencil Reporti()
Grid : Message : 56.457065 s : WilsonFermion5D StencilEven Reporti()
Grid : Message : 56.457066 s : WilsonFermion5D StencilOdd Reporti()
Grid : Message : 79.259261 s : Compare to naive wilson implementation Dag to verify correctness
Grid : Message : 79.259287 s : Called DwDag
Grid : Message : 79.259288 s : norm dag result 12.0421
Grid : Message : 79.271740 s : norm dag ref 12.0421
Grid : Message : 79.287759 s : norm dag diff 7.63236e-14
Grid : Message : 79.328100 s : Calling Deo and Doe and //assert Deo+Doe == Dunprec
Grid : Message : 79.955951 s : src_e0.499997
Grid : Message : 80.633620 s : src_o0.500003
Grid : Message : 80.164163 s : *********************************************************
Grid : Message : 80.164168 s : * Benchmarking DomainWallFermionF::DhopEO
Grid : Message : 80.164170 s : * Vectorising space-time by 8
Grid : Message : 80.164172 s : * SINGLE precision
Grid : Message : 80.164174 s : * Using Overlapped Comms/Compute
Grid : Message : 80.164177 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 80.164178 s : *********************************************************
Grid : Message : 93.797635 s : Deo mflop/s = 3.93231e+07
Grid : Message : 93.797670 s : Deo mflop/s per rank 2.45769e+06
Grid : Message : 93.797672 s : Deo mflop/s per node 9.83077e+06
Grid : Message : 93.797674 s : #### Dhop calls report
Grid : Message : 93.797675 s : WilsonFermion5D Number of DhopEO Calls : 3001
Grid : Message : 93.797677 s : WilsonFermion5D TotalTime /Calls : 4542.83 us
Grid : Message : 93.797679 s : WilsonFermion5D CommTime /Calls : 2978.97 us
Grid : Message : 93.797681 s : WilsonFermion5D FaceTime /Calls : 602.287 us
Grid : Message : 93.797683 s : WilsonFermion5D ComputeTime1/Calls : 67.1416 us
Grid : Message : 93.797685 s : WilsonFermion5D ComputeTime2/Calls : 1004.07 us
Grid : Message : 93.797713 s : Average mflops/s per call : 3.30731e+09
Grid : Message : 93.797717 s : Average mflops/s per call per rank : 2.06707e+08
Grid : Message : 93.797719 s : Average mflops/s per call per node : 8.26827e+08
Grid : Message : 93.797721 s : Average mflops/s per call (full) : 3.97084e+07
Grid : Message : 93.797727 s : Average mflops/s per call per rank (full): 2.48178e+06
Grid : Message : 93.797732 s : Average mflops/s per call per node (full): 9.92711e+06
Grid : Message : 93.797735 s : WilsonFermion5D Stencil
Grid : Message : 93.797746 s : WilsonFermion5D StencilEven
Grid : Message : 93.797758 s : WilsonFermion5D StencilOdd
Grid : Message : 93.797769 s : Stencil calls 3001
Grid : Message : 93.797773 s : Stencil halogtime 0
Grid : Message : 93.797776 s : Stencil gathertime 56.7458
Grid : Message : 93.797780 s : Stencil gathermtime 22.6504
Grid : Message : 93.797782 s : Stencil mergetime 21.1913
Grid : Message : 93.797786 s : Stencil decompresstime 0.0556481
Grid : Message : 93.797788 s : Stencil comms_bytes 2.01327e+08
Grid : Message : 93.797791 s : Stencil commtime 2989.33
Grid : Message : 93.797795 s : Stencil 67.3484 GB/s per rank
Grid : Message : 93.797798 s : Stencil 269.394 GB/s per node
Grid : Message : 93.797801 s : WilsonFermion5D Stencil Reporti()
Grid : Message : 93.797803 s : WilsonFermion5D StencilEven Reporti()
Grid : Message : 93.797805 s : WilsonFermion5D StencilOdd Reporti()
Grid : Message : 93.873429 s : r_e6.02111
Grid : Message : 93.879931 s : r_o6.02102
Grid : Message : 93.885912 s : res12.0421
Grid : Message : 94.876555 s : norm diff 0
Grid : Message : 95.485643 s : norm diff even 0
Grid : Message : 95.581236 s : norm diff odd 0
Grid : Message : 1.875883 s : Grid Layout
Grid : Message : 1.875893 s : Global lattice size : 64 64 64 64
Grid : Message : 1.875897 s : OpenMP threads : 4
Grid : Message : 1.875898 s : MPI tasks : 2 2 2 2
Grid : Message : 1.993571 s : Initialising 4d RNG
Grid : Message : 2.881990 s : Intialising parallel RNG with unique string 'The 4D RNG'
Grid : Message : 2.882370 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1
Grid : Message : 2.495044 s : Initialising 5d RNG
Grid : Message : 4.120900 s : Intialising parallel RNG with unique string 'The 5D RNG'
Grid : Message : 4.121350 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a
Grid : Message : 15.268010 s : Drawing gauge field
Grid : Message : 16.234025 s : Random gauge initialised
Grid : Message : 16.234057 s : Applying BCs
Grid : Message : 16.365565 s : Setting up Cshift based reference
Grid : Message : 44.512418 s : *****************************************************************
Grid : Message : 44.512448 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm
Grid : Message : 44.512450 s : *****************************************************************
Grid : Message : 44.512451 s : *****************************************************************
Grid : Message : 44.512452 s : * Benchmarking DomainWallFermionR::Dhop
Grid : Message : 44.512453 s : * Vectorising space-time by 8
Grid : Message : 44.512454 s : * VComplexF size is 64 B
Grid : Message : 44.512456 s : * SINGLE precision
Grid : Message : 44.512459 s : * Using Overlapped Comms/Compute
Grid : Message : 44.512460 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 44.512461 s : *****************************************************************
Grid : Message : 46.389070 s : Called warmup
Grid : Message : 49.211265 s : Called Dw 300 times in 2.82203e+06 us
Grid : Message : 49.211295 s : mflop/s = 3.76681e+07
Grid : Message : 49.211297 s : mflop/s per rank = 2.35425e+06
Grid : Message : 49.211299 s : mflop/s per node = 9.41702e+06
Grid : Message : 49.211301 s : RF GiB/s (base 2) = 76540.6
Grid : Message : 49.211308 s : mem GiB/s (base 2) = 47837.9
Grid : Message : 49.214868 s : norm diff 1.06409e-13
Grid : Message : 92.647781 s : Compare to naive wilson implementation Dag to verify correctness
Grid : Message : 92.647816 s : Called DwDag
Grid : Message : 92.647817 s : norm dag result 12.0421
Grid : Message : 92.801806 s : norm dag ref 12.0421
Grid : Message : 92.817724 s : norm dag diff 7.21921e-14
Grid : Message : 92.858973 s : Calling Deo and Doe and //assert Deo+Doe == Dunprec
Grid : Message : 93.210378 s : src_e0.499997
Grid : Message : 93.583286 s : src_o0.500003
Grid : Message : 93.682468 s : *********************************************************
Grid : Message : 93.682471 s : * Benchmarking DomainWallFermionF::DhopEO
Grid : Message : 93.682472 s : * Vectorising space-time by 8
Grid : Message : 93.682473 s : * SINGLE precision
Grid : Message : 93.682475 s : * Using Overlapped Comms/Compute
Grid : Message : 93.682476 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 93.682477 s : *********************************************************
Grid : Message : 95.162342 s : Deo mflop/s = 3.92487e+07
Grid : Message : 95.162387 s : Deo mflop/s per rank 2.45305e+06
Grid : Message : 95.162389 s : Deo mflop/s per node 9.81219e+06
Grid : Message : 95.232801 s : r_e6.02111
Grid : Message : 95.240061 s : r_o6.02102
Grid : Message : 95.245975 s : res12.0421
Grid : Message : 95.833402 s : norm diff 0
Grid : Message : 96.573829 s : norm diff even 0
Grid : Message : 96.868272 s : norm diff odd 0
Dirichlet block [0 64 64 32 32]
Grid : Message : 97.756909 s : Grid Layout
Grid : Message : 97.756911 s : Global lattice size : 64 64 64 64
Grid : Message : 97.756921 s : OpenMP threads : 4
Grid : Message : 97.756922 s : MPI tasks : 2 2 2 2
Grid : Message : 97.897085 s : Initialising 4d RNG
Grid : Message : 97.965061 s : Intialising parallel RNG with unique string 'The 4D RNG'
Grid : Message : 97.965097 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1
Grid : Message : 98.367431 s : Initialising 5d RNG
Grid : Message : 99.752745 s : Intialising parallel RNG with unique string 'The 5D RNG'
Grid : Message : 99.752790 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a
Grid : Message : 111.290148 s : Drawing gauge field
Grid : Message : 112.349289 s : Random gauge initialised
Grid : Message : 112.349320 s : Applying BCs
Grid : Message : 113.948740 s : Setting up Cshift based reference
Grid : Message : 140.320415 s : *****************************************************************
Grid : Message : 140.320443 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm
Grid : Message : 140.320444 s : *****************************************************************
Grid : Message : 140.320445 s : *****************************************************************
Grid : Message : 140.320446 s : * Benchmarking DomainWallFermionR::Dhop
Grid : Message : 140.320447 s : * Vectorising space-time by 8
Grid : Message : 140.320448 s : * VComplexF size is 64 B
Grid : Message : 140.320450 s : * SINGLE precision
Grid : Message : 140.320451 s : * Using Overlapped Comms/Compute
Grid : Message : 140.320452 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 140.320453 s : *****************************************************************
Grid : Message : 142.296150 s : Called warmup
Grid : Message : 144.397678 s : Called Dw 300 times in 2.36719e+06 us
Grid : Message : 144.397700 s : mflop/s = 4.49058e+07
Grid : Message : 144.397702 s : mflop/s per rank = 2.80661e+06
Grid : Message : 144.397704 s : mflop/s per node = 1.12265e+07
Grid : Message : 144.397706 s : RF GiB/s (base 2) = 91247.6
Grid : Message : 144.397708 s : mem GiB/s (base 2) = 57029.7
Grid : Message : 144.401269 s : norm diff 9.78944e-14
Grid : Message : 186.885460 s : Compare to naive wilson implementation Dag to verify correctness
Grid : Message : 186.885492 s : Called DwDag
Grid : Message : 186.885493 s : norm dag result 10.4157
Grid : Message : 186.897154 s : norm dag ref 11.2266
Grid : Message : 186.912538 s : norm dag diff 0.484633

View File

@ -1,14 +1,13 @@
#!/bin/bash
#SBATCH -J dslash
#SBATCH -A tc002
#SBATCH -t 2:20:00
#SBATCH --nodelist=tu-c0r0n[00,03,06,09]
#SBATCH -A dp207
#SBATCH --exclusive
#SBATCH --nodes=4
#SBATCH --ntasks=16
#SBATCH --qos=standard
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=8
#SBATCH --time=12:00:00
#SBATCH --time=0:05:00
#SBATCH --partition=gpu
#SBATCH --gres=gpu:4
#SBATCH --output=%x.%j.out

View File

@ -0,0 +1 @@
CXX=mpicxx-openmpi-mp CXXFLAGS=-I/opt/local/include/ LDFLAGS=-L/opt/local/lib/ ../../configure --enable-simd=GEN --enable-debug --enable-comms=mpi

View File

@ -55,6 +55,7 @@ static_assert(same_vComplex == 1, "Dirac Operators must have same underlying SIM
int main (int argc, char ** argv)
{
int nu = 0;
int tbc_aprd = 0; //use antiperiodic BCs in the time direction?
Grid_init(&argc,&argv);
@ -62,6 +63,9 @@ int main (int argc, char ** argv)
if(std::string(argv[i]) == "--Gparity-dir"){
std::stringstream ss; ss << argv[i+1]; ss >> nu;
std::cout << GridLogMessage << "Set Gparity direction to " << nu << std::endl;
}else if(std::string(argv[i]) == "--Tbc-APRD"){
tbc_aprd = 1;
std::cout << GridLogMessage << "Using antiperiodic BCs in the time direction" << std::endl;
}
}
@ -155,13 +159,18 @@ int main (int argc, char ** argv)
//Coordinate grid for reference
LatticeInteger xcoor_1f5(FGrid_1f);
LatticeCoordinate(xcoor_1f5,1+nu);
LatticeCoordinate(xcoor_1f5,1+nu); //note '1+nu'! This is because for 5D fields the s-direction is direction 0
Replicate(src,src_1f);
src_1f = where( xcoor_1f5 >= Integer(L), 2.0*src_1f,src_1f );
RealD mass=0.0;
RealD M5=1.8;
StandardDiracOp Ddwf(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5 DOP_PARAMS);
//Standard Dirac op
AcceleratorVector<Complex,4> bc_std(Nd, 1.0);
if(tbc_aprd) bc_std[Nd-1] = -1.; //antiperiodic time BC
StandardDiracOp::ImplParams std_params(bc_std);
StandardDiracOp Ddwf(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5 DOP_PARAMS, std_params);
StandardFermionField src_o_1f(FrbGrid_1f);
StandardFermionField result_o_1f(FrbGrid_1f);
@ -172,9 +181,11 @@ int main (int argc, char ** argv)
ConjugateGradient<StandardFermionField> CG(1.0e-8,10000);
CG(HermOpEO,src_o_1f,result_o_1f);
// const int nu = 3;
//Gparity Dirac op
std::vector<int> twists(Nd,0);
twists[nu] = 1;
if(tbc_aprd) twists[Nd-1] = 1;
GparityDiracOp::ImplParams params;
params.twists = twists;
GparityDiracOp GPDdwf(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f,*UrbGrid_2f,mass,M5 DOP_PARAMS,params);
@ -271,8 +282,11 @@ int main (int argc, char ** argv)
std::cout << "2f cb "<<result_o_2f.Checkerboard()<<std::endl;
std::cout << "1f cb "<<result_o_1f.Checkerboard()<<std::endl;
std::cout << " result norms " <<norm2(result_o_2f)<<" " <<norm2(result_o_1f)<<std::endl;
//Compare norms
std::cout << " result norms 2f: " <<norm2(result_o_2f)<<" 1f: " <<norm2(result_o_1f)<<std::endl;
//Take the 2f solution and convert into the corresponding 1f solution (odd cb only)
StandardFermionField res0o (FrbGrid_2f);
StandardFermionField res1o (FrbGrid_2f);
StandardFermionField res0 (FGrid_2f);
@ -281,12 +295,13 @@ int main (int argc, char ** argv)
res0=Zero();
res1=Zero();
res0o = PeekIndex<0>(result_o_2f,0);
res1o = PeekIndex<0>(result_o_2f,1);
res0o = PeekIndex<0>(result_o_2f,0); //flavor 0, odd cb
res1o = PeekIndex<0>(result_o_2f,1); //flavor 1, odd cb
std::cout << "res cb "<<res0o.Checkerboard()<<std::endl;
std::cout << "res cb "<<res1o.Checkerboard()<<std::endl;
//poke odd onto non-cb field
setCheckerboard(res0,res0o);
setCheckerboard(res1,res1o);
@ -296,12 +311,13 @@ int main (int argc, char ** argv)
Replicate(res0,replica0);
Replicate(res1,replica1);
//2nd half of doubled lattice has f=1
replica = where( xcoor_1f5 >= Integer(L), replica1,replica0 );
replica0 = Zero();
setCheckerboard(replica0,result_o_1f);
std::cout << "Norm2 solutions is " <<norm2(replica)<<" "<< norm2(replica0)<<std::endl;
std::cout << "Norm2 solutions 1f reconstructed from 2f: " <<norm2(replica)<<" Actual 1f: "<< norm2(replica0)<<std::endl;
replica = replica - replica0;

View File

@ -0,0 +1,177 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_gparity_flavour.cc
Copyright (C) 2015-2017
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace Grid;
static constexpr double tolerance = 1.0e-6;
static std::array<GparityFlavourMatrix, GparityFlavour::nSigma> testAlgebra;
void print(const GparityFlavourMatrix &g)
{
for(int i = 0; i < Ngp; i++)
{
std::cout << GridLogMessage << "(";
for(int j=0;j<Ngp;j++){
if ( abs( g(i,j)()() ) == 0 ) {
std::cout<< " 0";
} else if ( abs(g(i,j)()() - Complex(0,1)) == 0){
std::cout<< " i";
} else if ( abs(g(i,j)()() + Complex(0,1)) == 0){
std::cout<< "-i";
} else if ( abs(g(i,j)()() - Complex(1,0)) == 0){
std::cout<< " 1";
} else if ( abs(g(i,j)()() + Complex(1,0)) == 0){
std::cout<< "-1";
}
std::cout<<((j == Ngp-1) ? ")" : "," );
}
std::cout << std::endl;
}
std::cout << GridLogMessage << std::endl;
}
void createTestAlgebra(void)
{
std::array<GparityFlavourMatrix, 3> testg;
const Complex I(0., 1.), mI(0., -1.);
// 0 1
// 1 0
testg[0] = Zero();
testg[0](0, 1)()() = 1.;
testg[0](1, 0)()() = 1.;
std::cout << GridLogMessage << "test SigmaX= " << std::endl;
print(testg[0]);
// 0 -i
// i 0
testg[1] = Zero();
testg[1](0, 1)()() = mI;
testg[1](1, 0)()() = I;
std::cout << GridLogMessage << "test SigmaY= " << std::endl;
print(testg[1]);
// 1 0
// 0 -1
testg[2] = Zero();
testg[2](0, 0)()() = 1.0;
testg[2](1, 1)()() = -1.0;
std::cout << GridLogMessage << "test SigmaZ= " << std::endl;
print(testg[2]);
#define DEFINE_TEST_G(g, exp)\
testAlgebra[GparityFlavour::Algebra::g] = exp; \
testAlgebra[GparityFlavour::Algebra::Minus##g] = -exp;
DEFINE_TEST_G(SigmaX , testg[0]);
DEFINE_TEST_G(SigmaY , testg[1]);
DEFINE_TEST_G(SigmaZ , testg[2]);
DEFINE_TEST_G(Identity , 1.);
GparityFlavourMatrix pplus;
pplus = 1.0;
pplus = pplus + testg[1];
pplus = pplus * 0.5;
DEFINE_TEST_G(ProjPlus , pplus);
GparityFlavourMatrix pminus;
pminus = 1.0;
pminus = pminus - testg[1];
pminus = pminus * 0.5;
DEFINE_TEST_G(ProjMinus , pminus);
#undef DEFINE_TEST_G
}
template <typename Expr>
void test(const Expr &a, const Expr &b)
{
if (norm2(a - b) < tolerance)
{
std::cout << "[OK] ";
}
else
{
std::cout << "[fail]" << std::endl;
std::cout << GridLogError << "a= " << a << std::endl;
std::cout << GridLogError << "is different (tolerance= " << tolerance << ") from " << std::endl;
std::cout << GridLogError << "b= " << b << std::endl;
exit(EXIT_FAILURE);
}
}
void checkSigma(const GparityFlavour::Algebra a, GridSerialRNG &rng)
{
GparityFlavourVector v;
GparityFlavourMatrix m, &testg = testAlgebra[a];
GparityFlavour g(a);
random(rng, v);
random(rng, m);
std::cout << GridLogMessage << "Checking " << GparityFlavour::name[a] << ": ";
std::cout << "vecmul ";
test(g*v, testg*v);
std::cout << "matlmul ";
test(g*m, testg*m);
std::cout << "matrmul ";
test(m*g, m*testg);
std::cout << std::endl;
}
int main(int argc, char *argv[])
{
Grid_init(&argc,&argv);
Coordinate latt_size = GridDefaultLatt();
Coordinate simd_layout = GridDefaultSimd(4,vComplex::Nsimd());
Coordinate mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridSerialRNG sRNG;
sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
std::cout << GridLogMessage << "======== Test algebra" << std::endl;
createTestAlgebra();
std::cout << GridLogMessage << "======== Multiplication operators check" << std::endl;
for (int i = 0; i < GparityFlavour::nSigma; ++i)
{
checkSigma(i, sRNG);
}
std::cout << GridLogMessage << std::endl;
Grid_finalize();
return EXIT_SUCCESS;
}

View File

@ -71,26 +71,14 @@ int main (int argc, char ** argv)
////////////////////////////////////
RealD mass=0.2; //kills the diagonal term
RealD M5=1.8;
// const int nu = 3;
// std::vector<int> twists(Nd,0); // twists[nu] = 1;
// GparityDomainWallFermionR::ImplParams params; params.twists = twists;
// GparityDomainWallFermionR Ddwf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
// DomainWallFermionR Dw (U, Grid,RBGrid,mass,M5);
const int nu = 3;
const int nu = 0; //gparity direction
std::vector<int> twists(Nd,0);
twists[nu] = 1;
twists[Nd-1] = 1; //antiperiodic in time
GparityDomainWallFermionR::ImplParams params;
params.twists = twists;
/*
params.boundary_phases[0] = 1.0;
params.boundary_phases[1] = 1.0;
params.boundary_phases[2] = 1.0;
params.boundary_phases[3] =- 1.0;
*/
GparityDomainWallFermionR Dw(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
Dw.M (phi,Mphi);

View File

@ -71,8 +71,10 @@ int main (int argc, char ** argv)
RealD mass=0.01;
RealD M5=1.8;
const int nu = 3;
std::vector<int> twists(Nd,0); twists[nu] = 1;
const int nu = 1;
std::vector<int> twists(Nd,0);
twists[nu] = 1;
twists[3] = 1;
GparityDomainWallFermionR::ImplParams params; params.twists = twists;
GparityDomainWallFermionR Ddwf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
Ddwf.M (phi,Mphi);

View File

@ -64,8 +64,12 @@ int main (int argc, char ** argv)
////////////////////////////////////
RealD mass=0.01;
const int nu = 3;
std::vector<int> twists(Nd,0); twists[nu] = 1;
const int nu = 1;
const int Lnu=latt_size[nu];
std::vector<int> twists(Nd,0);
twists[nu] = 1;
twists[3]=1;
GparityWilsonFermionR::ImplParams params; params.twists = twists;
GparityWilsonFermionR Wil(U,*UGrid,*UrbGrid,mass,params);
Wil.M (phi,Mphi);

View File

@ -31,14 +31,38 @@ using namespace std;
using namespace Grid;
;
typedef typename GparityDomainWallFermionR::FermionField FermionField;
template<typename Action>
struct Setup{};
RealD AllZero(RealD x){ return 0.;}
template<>
struct Setup<GparityMobiusFermionR>{
static GparityMobiusFermionR* getAction(LatticeGaugeField &Umu,
GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){
RealD mass=0.01;
RealD M5=1.8;
RealD mob_b=1.5;
GparityMobiusFermionD ::ImplParams params;
std::vector<int> twists({1,1,1,0});
params.twists = twists;
return new GparityMobiusFermionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params);
}
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
template<>
struct Setup<DomainWallFermionR>{
static DomainWallFermionR* getAction(LatticeGaugeField &Umu,
GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){
RealD mass=0.01;
RealD M5=1.8;
return new DomainWallFermionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
}
};
template<typename Action>
void run(){
typedef typename Action::FermionField FermionField;
const int Ls=8;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
@ -56,24 +80,10 @@ int main (int argc, char ** argv)
LatticeGaugeField Umu(UGrid);
SU<Nc>::HotConfiguration(RNG4, Umu);
std::vector<LatticeColourMatrix> U(4,UGrid);
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
}
Action *action = Setup<Action>::getAction(Umu,FGrid,FrbGrid,UGrid,UrbGrid);
RealD mass=0.01;
RealD M5=1.8;
RealD mob_b=1.5;
// DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
GparityMobiusFermionD ::ImplParams params;
std::vector<int> twists({1,1,1,0});
params.twists = twists;
GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params);
// MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
// SchurDiagTwoOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
SchurDiagTwoOperator<GparityMobiusFermionR,FermionField> HermOp(Ddwf);
// SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
//MdagMLinearOperator<Action,FermionField> HermOp(Ddwf);
SchurDiagTwoOperator<Action,FermionField> HermOp(*action);
const int Nstop = 30;
const int Nk = 40;
@ -91,7 +101,6 @@ int main (int argc, char ** argv)
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt);
std::vector<RealD> eval(Nm);
FermionField src(FrbGrid);
gaussian(RNG5rb,src);
@ -103,6 +112,28 @@ int main (int argc, char ** argv)
int Nconv;
IRL.calc(eval,evec,src,Nconv);
delete action;
}
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::string action = "GparityMobius";
for(int i=1;i<argc;i++){
if(std::string(argv[i]) == "-action"){
action = argv[i+1];
}
}
if(action == "GparityMobius"){
run<GparityMobiusFermionR>();
}else if(action == "DWF"){
run<DomainWallFermionR>();
}else{
std::cout << "Unknown action" << std::endl;
exit(1);
}
Grid_finalize();
}

View File

@ -0,0 +1,184 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_multishift_mixedprec.cc
Copyright (C) 2015
Author: Christopher Kelly <ckelly@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace Grid;
template<typename SpeciesD, typename SpeciesF, typename GaugeStatisticsType>
void run_test(int argc, char ** argv, const typename SpeciesD::ImplParams &params){
const int Ls = 16;
GridCartesian* UGrid_d = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexD::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian* UrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_d);
GridCartesian* FGrid_d = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid_d);
GridRedBlackCartesian* FrbGrid_d = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid_d);
GridCartesian* UGrid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian* UrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_f);
GridCartesian* FGrid_f = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid_f);
GridRedBlackCartesian* FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid_f);
typedef typename SpeciesD::FermionField FermionFieldD;
typedef typename SpeciesF::FermionField FermionFieldF;
std::vector<int> seeds4({1, 2, 3, 4});
std::vector<int> seeds5({5, 6, 7, 8});
GridParallelRNG RNG5(FGrid_d);
RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid_d);
RNG4.SeedFixedIntegers(seeds4);
FermionFieldD src_d(FGrid_d);
random(RNG5, src_d);
LatticeGaugeFieldD Umu_d(UGrid_d);
//CPS-created G-parity ensembles have a factor of 2 error in the plaquette that causes the read to fail unless we workaround it
bool gparity_plaquette_fix = false;
for(int i=1;i<argc;i++){
if(std::string(argv[i]) == "--gparity_plaquette_fix"){
gparity_plaquette_fix=true;
break;
}
}
bool cfg_loaded=false;
for(int i=1;i<argc;i++){
if(std::string(argv[i]) == "--load_config"){
assert(i != argc-1);
std::string file = argv[i+1];
NerscIO io;
FieldMetaData metadata;
if(gparity_plaquette_fix) NerscIO::exitOnReadPlaquetteMismatch() = false;
io.readConfiguration<GaugeStatisticsType>(Umu_d, metadata, file);
if(gparity_plaquette_fix){
metadata.plaquette *= 2.; //correct header value
//Get the true plaquette
FieldMetaData tmp;
GaugeStatisticsType gs; gs(Umu_d, tmp);
std::cout << "After correction: plaqs " << tmp.plaquette << " " << metadata.plaquette << std::endl;
assert(fabs(tmp.plaquette -metadata.plaquette ) < 1.0e-5 );
}
cfg_loaded=true;
break;
}
}
if(!cfg_loaded)
SU<Nc>::HotConfiguration(RNG4, Umu_d);
LatticeGaugeFieldF Umu_f(UGrid_f);
precisionChange(Umu_f, Umu_d);
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl;
RealD mass = 0.01;
RealD M5 = 1.8;
SpeciesD Ddwf_d(Umu_d, *FGrid_d, *FrbGrid_d, *UGrid_d, *UrbGrid_d, mass, M5, params);
SpeciesF Ddwf_f(Umu_f, *FGrid_f, *FrbGrid_f, *UGrid_f, *UrbGrid_f, mass, M5, params);
FermionFieldD src_o_d(FrbGrid_d);
pickCheckerboard(Odd, src_o_d, src_d);
SchurDiagMooeeOperator<SpeciesD, FermionFieldD> HermOpEO_d(Ddwf_d);
SchurDiagMooeeOperator<SpeciesF, FermionFieldF> HermOpEO_f(Ddwf_f);
AlgRemez remez(1e-4, 64, 50);
int order = 15;
remez.generateApprox(order, 1, 2); //sqrt
MultiShiftFunction shifts(remez, 1e-10, false);
int relup_freq = 50;
double t1=usecond();
ConjugateGradientMultiShiftMixedPrec<FermionFieldD,FermionFieldF> mcg(10000, shifts, FrbGrid_f, HermOpEO_f, relup_freq);
std::vector<FermionFieldD> results_o_d(order, FrbGrid_d);
mcg(HermOpEO_d, src_o_d, results_o_d);
double t2=usecond();
//Crosscheck double and mixed prec results
ConjugateGradientMultiShift<FermionFieldD> dmcg(10000, shifts);
std::vector<FermionFieldD> results_o_d_2(order, FrbGrid_d);
dmcg(HermOpEO_d, src_o_d, results_o_d_2);
double t3=usecond();
std::cout << GridLogMessage << "Comparison of mixed prec results to double prec results |mixed - double|^2 :" << std::endl;
FermionFieldD tmp(FrbGrid_d);
for(int i=0;i<order;i++){
RealD ndiff = axpy_norm(tmp, -1., results_o_d[i], results_o_d_2[i]);
std::cout << i << " " << ndiff << std::endl;
}
std::cout<<GridLogMessage << "Mixed precision algorithm: Total usec = "<< (t2-t1)<<std::endl;
std::cout<<GridLogMessage << "Double precision algorithm: Total usec = "<< (t3-t2)<<std::endl;
}
int main (int argc, char ** argv)
{
Grid_init(&argc, &argv);
bool gparity = false;
int gpdir;
for(int i=1;i<argc;i++){
std::string arg(argv[i]);
if(arg == "--Gparity"){
assert(i!=argc-1);
gpdir = std::stoi(argv[i+1]);
assert(gpdir >= 0 && gpdir <= 2); //spatial!
gparity = true;
}
}
if(gparity){
std::cout << "Running test with G-parity BCs in " << gpdir << " direction" << std::endl;
GparityWilsonImplParams params;
params.twists[gpdir] = 1;
std::vector<int> conj_dirs(Nd,0);
conj_dirs[gpdir] = 1;
ConjugateGimplD::setDirections(conj_dirs);
run_test<GparityDomainWallFermionD, GparityDomainWallFermionF, ConjugateGaugeStatistics>(argc,argv,params);
}else{
std::cout << "Running test with periodic BCs" << std::endl;
WilsonImplParams params;
run_test<DomainWallFermionD, DomainWallFermionF, PeriodicGaugeStatistics>(argc,argv,params);
}
Grid_finalize();
}