mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-03 18:55:56 +01:00
Imported changes from feature/gparity_HMC branch:
Added storage of final true residual in mixed-prec CG and enhanced log output Fixed const correctness of multi-shift constructor Added a mixed precision variant of the multi-shift algorithm that uses a single precision operator and applies periodic reliable update to the residual Added tests/solver/Test_dwf_multishift_mixedprec to test the above Fixed local coherence lanczos using the (large!) max approx to the chebyshev eval as the scale from which to judge the quality of convergence, resulting a test that always passes Added a method to local coherence lanczos class that returns the fine eval/evec pair Added iterative log output to power method Added optional disabling of the plaquette check in Nerscio to support loading old G-parity configs which have a factor of 2 error in the plaquette G-parity Dirac op no longer allows GPBC in the time direction; instead we toggle between periodic and antiperiodic Replaced thread_for G-parity 5D force insertion implementation with accelerator_for version capable of running on GPUs Generalized tests/lanczos/Test_dwf_lanczos to support regular DWF as well as Gparity, with the action chosen by a command line option Modified tests/forces/Test_dwf_gpforce,Test_gpdwf_force,Test_gpwilson_force to use GPBC a spatial direction rather than the t-direction, and antiperiodic BCs for time direction tests/core/Test_gparity now supports using APBC in time direction using command line toggle
This commit is contained in:
parent
81fe4c937e
commit
6121397587
@ -54,6 +54,7 @@ NAMESPACE_CHECK(BiCGSTAB);
|
||||
#include <Grid/algorithms/iterative/SchurRedBlack.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h>
|
||||
#include <Grid/algorithms/iterative/BiCGSTABMixedPrec.h>
|
||||
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
|
||||
|
@ -49,6 +49,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
Integer TotalInnerIterations; //Number of inner CG iterations
|
||||
Integer TotalOuterIterations; //Number of restarts
|
||||
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
|
||||
RealD TrueResidual;
|
||||
|
||||
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
|
||||
LinearFunction<FieldF> *guesser;
|
||||
@ -68,6 +69,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
}
|
||||
|
||||
void operator() (const FieldD &src_d_in, FieldD &sol_d){
|
||||
std::cout << GridLogMessage << "MixedPrecisionConjugateGradient: Starting mixed precision CG with outer tolerance " << Tolerance << " and inner tolerance " << InnerTolerance << std::endl;
|
||||
TotalInnerIterations = 0;
|
||||
|
||||
GridStopWatch TotalTimer;
|
||||
@ -97,6 +99,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
FieldF sol_f(SinglePrecGrid);
|
||||
sol_f.Checkerboard() = cb;
|
||||
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting initial inner CG with tolerance " << inner_tol << std::endl;
|
||||
ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations);
|
||||
CG_f.ErrorOnNoConverge = false;
|
||||
|
||||
@ -130,6 +133,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
(*guesser)(src_f, sol_f);
|
||||
|
||||
//Inner CG
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " << outer_iter << " starting inner CG with tolerance " << inner_tol << std::endl;
|
||||
CG_f.Tolerance = inner_tol;
|
||||
InnerCGtimer.Start();
|
||||
CG_f(Linop_f, src_f, sol_f);
|
||||
@ -150,6 +154,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
|
||||
CG_d(Linop_d, src_d_in, sol_d);
|
||||
TotalFinalStepIterations = CG_d.IterationsToComplete;
|
||||
TrueResidual = CG_d.TrueResidual;
|
||||
|
||||
TotalTimer.Stop();
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl;
|
||||
|
@ -52,7 +52,7 @@ public:
|
||||
MultiShiftFunction shifts;
|
||||
std::vector<RealD> TrueResidualShift;
|
||||
|
||||
ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) :
|
||||
ConjugateGradientMultiShift(Integer maxit, const MultiShiftFunction &_shifts) :
|
||||
MaxIterations(maxit),
|
||||
shifts(_shifts)
|
||||
{
|
||||
@ -183,6 +183,9 @@ public:
|
||||
axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
|
||||
}
|
||||
|
||||
std::cout << GridLogIterative << "ConjugateGradientMultiShift: initial rn (|src|^2) =" << rn << " qq (|MdagM src|^2) =" << qq << " d ( dot(src, [MdagM + m_0]src) ) =" << d << " c=" << c << std::endl;
|
||||
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
|
409
Grid/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h
Normal file
409
Grid/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h
Normal file
@ -0,0 +1,409 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christopher Kelly <ckelly@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H
|
||||
#define GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//CK 2020: A variant of the multi-shift conjugate gradient with the matrix multiplication in single precision.
|
||||
//The residual is stored in single precision, but the search directions and solution are stored in double precision.
|
||||
//Every update_freq iterations the residual is corrected in double precision.
|
||||
|
||||
//For safety the a final regular CG is applied to clean up if necessary
|
||||
|
||||
//Linop to add shift to input linop, used in cleanup CG
|
||||
namespace ConjugateGradientMultiShiftMixedPrecSupport{
|
||||
template<typename Field>
|
||||
class ShiftedLinop: public LinearOperatorBase<Field>{
|
||||
public:
|
||||
LinearOperatorBase<Field> &linop_base;
|
||||
RealD shift;
|
||||
|
||||
ShiftedLinop(LinearOperatorBase<Field> &_linop_base, RealD _shift): linop_base(_linop_base), shift(_shift){}
|
||||
|
||||
void OpDiag (const Field &in, Field &out){ assert(0); }
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); }
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); }
|
||||
|
||||
void Op (const Field &in, Field &out){ assert(0); }
|
||||
void AdjOp (const Field &in, Field &out){ assert(0); }
|
||||
|
||||
void HermOp(const Field &in, Field &out){
|
||||
linop_base.HermOp(in, out);
|
||||
axpy(out, shift, in, out);
|
||||
}
|
||||
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
HermOp(in,out);
|
||||
ComplexD dot = innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template<class FieldD, class FieldF,
|
||||
typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
|
||||
typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||
class ConjugateGradientMultiShiftMixedPrec : public OperatorMultiFunction<FieldD>,
|
||||
public OperatorFunction<FieldD>
|
||||
{
|
||||
public:
|
||||
|
||||
using OperatorFunction<FieldD>::operator();
|
||||
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
std::vector<int> IterationsToCompleteShift; // Iterations for this shift
|
||||
int verbose;
|
||||
MultiShiftFunction shifts;
|
||||
std::vector<RealD> TrueResidualShift;
|
||||
|
||||
int ReliableUpdateFreq; //number of iterations between reliable updates
|
||||
|
||||
GridBase* SinglePrecGrid; //Grid for single-precision fields
|
||||
LinearOperatorBase<FieldF> &Linop_f; //single precision
|
||||
|
||||
ConjugateGradientMultiShiftMixedPrec(Integer maxit, const MultiShiftFunction &_shifts,
|
||||
GridBase* _SinglePrecGrid, LinearOperatorBase<FieldF> &_Linop_f,
|
||||
int _ReliableUpdateFreq
|
||||
) :
|
||||
MaxIterations(maxit), shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq)
|
||||
{
|
||||
verbose=1;
|
||||
IterationsToCompleteShift.resize(_shifts.order);
|
||||
TrueResidualShift.resize(_shifts.order);
|
||||
}
|
||||
|
||||
void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, FieldD &psi)
|
||||
{
|
||||
GridBase *grid = src.Grid();
|
||||
int nshift = shifts.order;
|
||||
std::vector<FieldD> results(nshift,grid);
|
||||
(*this)(Linop,src,results,psi);
|
||||
}
|
||||
void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, std::vector<FieldD> &results, FieldD &psi)
|
||||
{
|
||||
int nshift = shifts.order;
|
||||
|
||||
(*this)(Linop,src,results);
|
||||
|
||||
psi = shifts.norm*src;
|
||||
for(int i=0;i<nshift;i++){
|
||||
psi = psi + shifts.residues[i]*results[i];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void operator() (LinearOperatorBase<FieldD> &Linop_d, const FieldD &src_d, std::vector<FieldD> &psi_d)
|
||||
{
|
||||
GridBase *DoublePrecGrid = src_d.Grid();
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Convenience references to the info stored in "MultiShiftFunction"
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
int nshift = shifts.order;
|
||||
|
||||
std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts"
|
||||
std::vector<RealD> &mresidual(shifts.tolerances);
|
||||
std::vector<RealD> alpha(nshift,1.0);
|
||||
|
||||
//Double precision search directions
|
||||
FieldD p_d(DoublePrecGrid);
|
||||
std::vector<FieldD> ps_d(nshift, DoublePrecGrid);// Search directions (double precision)
|
||||
|
||||
FieldD tmp_d(DoublePrecGrid);
|
||||
FieldD r_d(DoublePrecGrid);
|
||||
FieldD mmp_d(DoublePrecGrid);
|
||||
|
||||
assert(psi_d.size()==nshift);
|
||||
assert(mass.size()==nshift);
|
||||
assert(mresidual.size()==nshift);
|
||||
|
||||
// dynamic sized arrays on stack; 2d is a pain with vector
|
||||
RealD bs[nshift];
|
||||
RealD rsq[nshift];
|
||||
RealD z[nshift][2];
|
||||
int converged[nshift];
|
||||
|
||||
const int primary =0;
|
||||
|
||||
//Primary shift fields CG iteration
|
||||
RealD a,b,c,d;
|
||||
RealD cp,bp,qq; //prev
|
||||
|
||||
// Matrix mult fields
|
||||
FieldF r_f(SinglePrecGrid);
|
||||
FieldF p_f(SinglePrecGrid);
|
||||
FieldF tmp_f(SinglePrecGrid);
|
||||
FieldF mmp_f(SinglePrecGrid);
|
||||
FieldF src_f(SinglePrecGrid);
|
||||
precisionChange(src_f, src_d);
|
||||
|
||||
// Check lightest mass
|
||||
for(int s=0;s<nshift;s++){
|
||||
assert( mass[s]>= mass[primary] );
|
||||
converged[s]=0;
|
||||
}
|
||||
|
||||
// Wire guess to zero
|
||||
// Residuals "r" are src
|
||||
// First search direction "p" is also src
|
||||
cp = norm2(src_d);
|
||||
|
||||
// Handle trivial case of zero src.
|
||||
if( cp == 0. ){
|
||||
for(int s=0;s<nshift;s++){
|
||||
psi_d[s] = Zero();
|
||||
IterationsToCompleteShift[s] = 1;
|
||||
TrueResidualShift[s] = 0.;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for(int s=0;s<nshift;s++){
|
||||
rsq[s] = cp * mresidual[s] * mresidual[s];
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift "<< s <<" target resid "<<rsq[s]<<std::endl;
|
||||
ps_d[s] = src_d;
|
||||
}
|
||||
// r and p for primary
|
||||
r_f=src_f; //residual maintained in single
|
||||
p_f=src_f;
|
||||
p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys
|
||||
|
||||
//MdagM+m[0]
|
||||
Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
|
||||
axpy(mmp_f,mass[0],p_f,mmp_f);
|
||||
RealD rn = norm2(p_f);
|
||||
d += rn*mass[0];
|
||||
|
||||
b = -cp /d;
|
||||
|
||||
// Set up the various shift variables
|
||||
int iz=0;
|
||||
z[0][1-iz] = 1.0;
|
||||
z[0][iz] = 1.0;
|
||||
bs[0] = b;
|
||||
for(int s=1;s<nshift;s++){
|
||||
z[s][1-iz] = 1.0;
|
||||
z[s][iz] = 1.0/( 1.0 - b*(mass[s]-mass[0]));
|
||||
bs[s] = b*z[s][iz];
|
||||
}
|
||||
|
||||
// r += b[0] A.p[0]
|
||||
// c= norm(r)
|
||||
c=axpy_norm(r_f,b,mmp_f,r_f);
|
||||
|
||||
for(int s=0;s<nshift;s++) {
|
||||
axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d);
|
||||
}
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch AXPYTimer, ShiftTimer, QRTimer, MatrixTimer, SolverTimer, PrecChangeTimer, CleanupTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
|
||||
// Iteration loop
|
||||
int k;
|
||||
|
||||
for (k=1;k<=MaxIterations;k++){
|
||||
a = c /cp;
|
||||
|
||||
//Update double precision search direction by residual
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(r_d, r_f);
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
AXPYTimer.Start();
|
||||
axpy(p_d,a,p_d,r_d);
|
||||
|
||||
for(int s=0;s<nshift;s++){
|
||||
if ( ! converged[s] ) {
|
||||
if (s==0){
|
||||
axpy(ps_d[s],a,ps_d[s],r_d);
|
||||
} else{
|
||||
RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b);
|
||||
axpby(ps_d[s],z[s][iz],as,r_d,ps_d[s]);
|
||||
}
|
||||
}
|
||||
}
|
||||
AXPYTimer.Stop();
|
||||
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(p_f, p_d); //get back single prec search direction for linop
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
cp=c;
|
||||
MatrixTimer.Start();
|
||||
Linop_f.HermOp(p_f,mmp_f);
|
||||
d=real(innerProduct(p_f,mmp_f));
|
||||
MatrixTimer.Stop();
|
||||
|
||||
AXPYTimer.Start();
|
||||
axpy(mmp_f,mass[0],p_f,mmp_f);
|
||||
AXPYTimer.Stop();
|
||||
RealD rn = norm2(p_f);
|
||||
d += rn*mass[0];
|
||||
|
||||
bp=b;
|
||||
b=-cp/d;
|
||||
|
||||
// Toggle the recurrence history
|
||||
bs[0] = b;
|
||||
iz = 1-iz;
|
||||
ShiftTimer.Start();
|
||||
for(int s=1;s<nshift;s++){
|
||||
if((!converged[s])){
|
||||
RealD z0 = z[s][1-iz];
|
||||
RealD z1 = z[s][iz];
|
||||
z[s][iz] = z0*z1*bp
|
||||
/ (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));
|
||||
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
|
||||
}
|
||||
}
|
||||
ShiftTimer.Stop();
|
||||
|
||||
//Update double precision solutions
|
||||
AXPYTimer.Start();
|
||||
for(int s=0;s<nshift;s++){
|
||||
int ss = s;
|
||||
if( (!converged[s]) ) {
|
||||
axpy(psi_d[ss],-bs[s]*alpha[s],ps_d[s],psi_d[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
//Perform reliable update if necessary; otherwise update residual from single-prec mmp
|
||||
RealD c_f = axpy_norm(r_f,b,mmp_f,r_f);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
c = c_f;
|
||||
|
||||
if(k % ReliableUpdateFreq == 0){
|
||||
//Replace r with true residual
|
||||
MatrixTimer.Start();
|
||||
Linop_d.HermOp(psi_d[0],mmp_d);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
AXPYTimer.Start();
|
||||
axpy(mmp_d,mass[0],psi_d[0],mmp_d);
|
||||
|
||||
RealD c_d = axpy_norm(r_d, -1.0, mmp_d, src_d);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<< ", replaced |r|^2 = "<<c_f <<" with |r|^2 = "<<c_d<<std::endl;
|
||||
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(r_f, r_d);
|
||||
PrecChangeTimer.Stop();
|
||||
c = c_d;
|
||||
}
|
||||
|
||||
// Convergence checks
|
||||
int all_converged = 1;
|
||||
for(int s=0;s<nshift;s++){
|
||||
|
||||
if ( (!converged[s]) ){
|
||||
IterationsToCompleteShift[s] = k;
|
||||
|
||||
RealD css = c * z[s][iz]* z[s][iz];
|
||||
|
||||
if(css<rsq[s]){
|
||||
if ( ! converged[s] )
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
|
||||
converged[s]=1;
|
||||
} else {
|
||||
all_converged=0;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if ( all_converged ){
|
||||
|
||||
SolverTimer.Stop();
|
||||
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: All shifts have converged iteration "<<k<<std::endl;
|
||||
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: Checking solutions"<<std::endl;
|
||||
|
||||
// Check answers
|
||||
for(int s=0; s < nshift; s++) {
|
||||
Linop_d.HermOpAndNorm(psi_d[s],mmp_d,d,qq);
|
||||
axpy(tmp_d,mass[s],psi_d[s],mmp_d);
|
||||
axpy(r_d,-alpha[s],src_d,tmp_d);
|
||||
RealD rn = norm2(r_d);
|
||||
RealD cn = norm2(src_d);
|
||||
TrueResidualShift[s] = std::sqrt(rn/cn);
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift["<<s<<"] true residual "<< TrueResidualShift[s] << " target " << mresidual[s] << std::endl;
|
||||
|
||||
//If we have not reached the desired tolerance, do a (mixed precision) CG cleanup
|
||||
if(rn >= rsq[s]){
|
||||
CleanupTimer.Start();
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: performing cleanup step for shift " << s << std::endl;
|
||||
|
||||
//Setup linear operators for final cleanup
|
||||
ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldD> Linop_shift_d(Linop_d, mass[s]);
|
||||
ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldF> Linop_shift_f(Linop_f, mass[s]);
|
||||
|
||||
MixedPrecisionConjugateGradient<FieldD,FieldF> cg(mresidual[s], MaxIterations, MaxIterations, SinglePrecGrid, Linop_shift_f, Linop_shift_d);
|
||||
cg(src_d, psi_d[s]);
|
||||
|
||||
TrueResidualShift[s] = cg.TrueResidual;
|
||||
CleanupTimer.Stop();
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientMultiShiftMixedPrec: Time Breakdown for body"<<std::endl;
|
||||
std::cout << GridLogMessage << "\tSolver " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\t\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\t\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\t\tShift " << ShiftTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\t\tPrecision Change " << PrecChangeTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tFinal Cleanup " << CleanupTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tSolver+Cleanup " << SolverTimer.Elapsed() + CleanupTimer.Elapsed() << std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
// ugly hack
|
||||
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
||||
// assert(0);
|
||||
}
|
||||
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -44,6 +44,7 @@ public:
|
||||
int, MinRes); // Must restart
|
||||
};
|
||||
|
||||
//This class is the input parameter class for some testing programs
|
||||
struct LocalCoherenceLanczosParams : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
|
||||
@ -155,6 +156,7 @@ public:
|
||||
_coarse_relax_tol(coarse_relax_tol)
|
||||
{ };
|
||||
|
||||
//evalMaxApprox: approximation of largest eval of the fine Chebyshev operator (suitably wrapped by block projection)
|
||||
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
CoarseField v(B);
|
||||
@ -181,8 +183,16 @@ public:
|
||||
if( (vv<eresid*eresid) ) conv = 1;
|
||||
return conv;
|
||||
}
|
||||
|
||||
//This function is called at the end of the coarse grid Lanczos. It promotes the coarse eigenvector 'B' to the fine grid,
|
||||
//applies a smoother to the result then computes the computes the *fine grid* eigenvalue (output as 'eval').
|
||||
|
||||
//evalMaxApprox should be the approximation of the largest eval of the fine Hermop. However when this function is called by IRL it actually passes the largest eval of the *Chebyshev* operator (as this is the max approx used for the TestConvergence above)
|
||||
//As the largest eval of the Chebyshev is typically several orders of magnitude larger this makes the convergence test pass even when it should not.
|
||||
//We therefore ignore evalMaxApprox here and use a value of 1.0 (note this value is already used by TestCoarse)
|
||||
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
evalMaxApprox = 1.0; //cf above
|
||||
GridBase *FineGrid = _subspace[0].Grid();
|
||||
int checkerboard = _subspace[0].Checkerboard();
|
||||
FineField fB(FineGrid);fB.Checkerboard() =checkerboard;
|
||||
@ -201,13 +211,13 @@ public:
|
||||
eval = vnum/vden;
|
||||
fv -= eval*fB;
|
||||
RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0);
|
||||
if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
|
||||
|
||||
std::cout.precision(13);
|
||||
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
|
||||
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
|
||||
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
|
||||
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv << " target " << eresid*eresid
|
||||
<<std::endl;
|
||||
if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
|
||||
if( (vv<eresid*eresid) ) return 1;
|
||||
return 0;
|
||||
}
|
||||
@ -285,6 +295,10 @@ public:
|
||||
evals_coarse.resize(0);
|
||||
};
|
||||
|
||||
//The block inner product is the inner product on the fine grid locally summed over the blocks
|
||||
//to give a Lattice<Scalar> on the coarse grid. This function orthnormalizes the fine-grid subspace
|
||||
//vectors under the block inner product. This step must be performed after computing the fine grid
|
||||
//eigenvectors and before computing the coarse grid eigenvectors.
|
||||
void Orthogonalise(void ) {
|
||||
CoarseScalar InnerProd(_CoarseGrid);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
|
||||
@ -328,6 +342,8 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
//While this method serves to check the coarse eigenvectors, it also recomputes the eigenvalues from the smoothed reconstructed eigenvectors
|
||||
//hence the smoother can be tuned after running the coarse Lanczos by using a different smoother here
|
||||
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
|
||||
{
|
||||
assert(evals_fine.size() == nbasis);
|
||||
@ -376,18 +392,23 @@ public:
|
||||
evals_fine.resize(nbasis);
|
||||
subspace.resize(nbasis,_FineGrid);
|
||||
}
|
||||
|
||||
|
||||
//cheby_op: Parameters of the fine grid Chebyshev polynomial used for the Lanczos acceleration
|
||||
//cheby_smooth: Parameters of a separate Chebyshev polynomial used after the Lanczos has completed to smooth out high frequency noise in the reconstructed fine grid eigenvectors prior to computing the eigenvalue
|
||||
//relax: Reconstructed eigenvectors (post smoothing) are naturally not as precise as true eigenvectors. This factor acts as a multiplier on the stopping condition when determining whether the results satisfy the user provided stopping condition
|
||||
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
|
||||
int Nstop, int Nk, int Nm,RealD resid,
|
||||
RealD MaxIt, RealD betastp, int MinRes)
|
||||
{
|
||||
Chebyshev<FineField> Cheby(cheby_op);
|
||||
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
|
||||
Chebyshev<FineField> Cheby(cheby_op); //Chebyshev of fine operator on fine grid
|
||||
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace); //Fine operator on coarse grid with intermediate fine grid conversion
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace); //Chebyshev of fine operator on coarse grid with intermediate fine grid conversion
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
||||
Chebyshev<FineField> ChebySmooth(cheby_smooth); //lower order Chebyshev of fine operator on fine grid used to smooth regenerated eigenvectors
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
|
||||
|
||||
evals_coarse.resize(Nm);
|
||||
@ -395,6 +416,7 @@ public:
|
||||
|
||||
CoarseField src(_CoarseGrid); src=1.0;
|
||||
|
||||
//Note the "tester" here is also responsible for generating the fine grid eigenvalues which are output into the "evals_coarse" array
|
||||
ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
|
||||
int Nconv=0;
|
||||
IRL.calc(evals_coarse,evec_coarse,src,Nconv,false);
|
||||
@ -405,6 +427,14 @@ public:
|
||||
std::cout << i << " Coarse eval = " << evals_coarse[i] << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
//Get the fine eigenvector 'i' by reconstruction
|
||||
void getFineEvecEval(FineField &evec, RealD &eval, const int i) const{
|
||||
blockPromote(evec_coarse[i],evec,subspace);
|
||||
eval = evals_coarse[i];
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -30,6 +30,8 @@ template<class Field> class PowerMethod
|
||||
RealD vden = norm2(src_n);
|
||||
RealD na = vnum/vden;
|
||||
|
||||
std::cout << GridLogIterative << "PowerMethod: Current approximation of largest eigenvalue " << na << std::endl;
|
||||
|
||||
if ( (fabs(evalMaxApprox/na - 1.0) < 0.001) || (i==_MAX_ITER_EST_-1) ) {
|
||||
evalMaxApprox = na;
|
||||
std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
|
||||
|
@ -39,9 +39,11 @@ using namespace Grid;
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class NerscIO : public BinaryIO {
|
||||
public:
|
||||
|
||||
typedef Lattice<vLorentzColourMatrixD> GaugeField;
|
||||
|
||||
// Enable/disable exiting if the plaquette in the header does not match the value computed (default true)
|
||||
static bool & exitOnReadPlaquetteMismatch(){ static bool v=true; return v; }
|
||||
|
||||
static inline void truncate(std::string file){
|
||||
std::ofstream fout(file,std::ios::out);
|
||||
}
|
||||
@ -198,7 +200,7 @@ public:
|
||||
std::cerr << " nersc_csum " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl;
|
||||
exit(0);
|
||||
}
|
||||
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
|
||||
if(exitOnReadPlaquetteMismatch()) assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
|
||||
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
|
||||
assert(nersc_csum == header.checksum );
|
||||
|
||||
|
@ -30,6 +30,18 @@ directory
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/*
|
||||
Policy implementation for G-parity boundary conditions
|
||||
|
||||
Rather than treating the gauge field as a flavored field, the Grid implementation of G-parity treats the gauge field as a regular
|
||||
field with complex conjugate boundary conditions. In order to ensure the second flavor interacts with the conjugate links and the first
|
||||
with the regular links we overload the functionality of doubleStore, whose purpose is to store the gauge field and the barrel-shifted gauge field
|
||||
to avoid communicating links when applying the Dirac operator, such that the double-stored field contains also a flavor index which maps to
|
||||
either the link or the conjugate link. This flavored field is then used by multLink to apply the correct link to a spinor.
|
||||
|
||||
Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.
|
||||
mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
|
||||
*/
|
||||
template <class S, class Representation = FundamentalRepresentation, class Options=CoeffReal>
|
||||
class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Representation::Dimension> > {
|
||||
public:
|
||||
@ -113,7 +125,7 @@ public:
|
||||
|| ((distance== 1)&&(icoor[direction]==1))
|
||||
|| ((distance==-1)&&(icoor[direction]==0));
|
||||
|
||||
permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu]; //only if we are going around the world
|
||||
permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu] && mmu < Nd-1; //only if we are going around the world in a spatial direction
|
||||
|
||||
//Apply the links
|
||||
int f_upper = permute_lane ? 1 : 0;
|
||||
@ -139,10 +151,10 @@ public:
|
||||
assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
|
||||
assert((sl == 1) || (sl == 2));
|
||||
|
||||
if ( SE->_around_the_world && St.parameters.twists[mmu] ) {
|
||||
|
||||
//If this site is an global boundary site, perform the G-parity flavor twist
|
||||
if ( mmu < Nd-1 && SE->_around_the_world && St.parameters.twists[mmu] ) {
|
||||
if ( sl == 2 ) {
|
||||
|
||||
//Only do the twist for lanes on the edge of the physical node
|
||||
ExtractBuffer<sobj> vals(Nsimd);
|
||||
|
||||
extract(chi,vals);
|
||||
@ -197,6 +209,19 @@ public:
|
||||
reg = memory;
|
||||
}
|
||||
|
||||
|
||||
//Poke 'poke_f0' onto flavor 0 and 'poke_f1' onto flavor 1 in direction mu of the doubled gauge field Uds
|
||||
inline void pokeGparityDoubledGaugeField(DoubledGaugeField &Uds, const GaugeLinkField &poke_f0, const GaugeLinkField &poke_f1, const int mu){
|
||||
autoView(poke_f0_v, poke_f0, CpuRead);
|
||||
autoView(poke_f1_v, poke_f1, CpuRead);
|
||||
autoView(Uds_v, Uds, CpuWrite);
|
||||
thread_foreach(ss,poke_f0_v,{
|
||||
Uds_v[ss](0)(mu) = poke_f0_v[ss]();
|
||||
Uds_v[ss](1)(mu) = poke_f1_v[ss]();
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
{
|
||||
conformable(Uds.Grid(),GaugeGrid);
|
||||
@ -208,13 +233,18 @@ public:
|
||||
|
||||
Lattice<iScalar<vInteger> > coor(GaugeGrid);
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
//Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.
|
||||
//mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
|
||||
for(int mu=0;mu<Nd-1;mu++){
|
||||
|
||||
if( Params.twists[mu] ){
|
||||
LatticeCoordinate(coor,mu);
|
||||
}
|
||||
|
||||
U = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
Uconj = conjugate(U);
|
||||
|
||||
// Implement the isospin rotation sign on the boundary between f=1 and f=0
|
||||
// This phase could come from a simple bc 1,1,-1,1 ..
|
||||
int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
|
||||
if ( Params.twists[mu] ) {
|
||||
@ -260,6 +290,38 @@ public:
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
{ //periodic / antiperiodic temporal BCs
|
||||
int mu = Nd-1;
|
||||
int L = GaugeGrid->GlobalDimensions()[mu];
|
||||
int Lmu = L - 1;
|
||||
|
||||
LatticeCoordinate(coor, mu);
|
||||
|
||||
U = PeekIndex<LorentzIndex>(Umu, mu); //Get t-directed links
|
||||
|
||||
GaugeLinkField *Upoke = &U;
|
||||
|
||||
if(Params.twists[mu]){ //antiperiodic
|
||||
Utmp = where(coor == Lmu, -U, U);
|
||||
Upoke = &Utmp;
|
||||
}
|
||||
|
||||
Uconj = conjugate(*Upoke); //second flavor interacts with conjugate links
|
||||
pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu);
|
||||
|
||||
//Get the barrel-shifted field
|
||||
Utmp = adj(Cshift(U, mu, -1)); //is a forward shift!
|
||||
Upoke = &Utmp;
|
||||
|
||||
if(Params.twists[mu]){
|
||||
U = where(coor == 0, -Utmp, Utmp); //boundary phase
|
||||
Upoke = &U;
|
||||
}
|
||||
|
||||
Uconj = conjugate(*Upoke);
|
||||
pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu + 4);
|
||||
}
|
||||
}
|
||||
|
||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) {
|
||||
@ -300,27 +362,47 @@ public:
|
||||
}
|
||||
|
||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) {
|
||||
|
||||
int Ls=Btilde.Grid()->_fdimensions[0];
|
||||
|
||||
GaugeLinkField tmp(mat.Grid());
|
||||
tmp = Zero();
|
||||
{
|
||||
autoView( tmp_v , tmp, CpuWrite);
|
||||
autoView( Atilde_v , Atilde, CpuRead);
|
||||
autoView( Btilde_v , Btilde, CpuRead);
|
||||
thread_for(ss,tmp.Grid()->oSites(),{
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
int sF = s + Ls * ss;
|
||||
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde_v[sF], Atilde_v[sF]));
|
||||
tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
|
||||
GridBase *GaugeGrid = mat.Grid();
|
||||
Lattice<iScalar<vInteger> > coor(GaugeGrid);
|
||||
|
||||
if( Params.twists[mu] ){
|
||||
LatticeCoordinate(coor,mu);
|
||||
}
|
||||
|
||||
autoView( mat_v , mat, AcceleratorWrite);
|
||||
autoView( Btilde_v , Btilde, AcceleratorRead);
|
||||
autoView( Atilde_v , Atilde, AcceleratorRead);
|
||||
accelerator_for(sss,mat.Grid()->oSites(), FermionField::vector_type::Nsimd(),{
|
||||
int sU=sss;
|
||||
typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
|
||||
ColorMatrixType sum;
|
||||
zeroit(sum);
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
for(int spn=0;spn<Ns;spn++){ //sum over spin
|
||||
//Flavor 0
|
||||
auto bb = coalescedRead(Btilde_v[sF](0)(spn) ); //color vector
|
||||
auto aa = coalescedRead(Atilde_v[sF](0)(spn) );
|
||||
sum = sum + outerProduct(bb,aa);
|
||||
|
||||
//Flavor 1
|
||||
bb = coalescedRead(Btilde_v[sF](1)(spn) );
|
||||
aa = coalescedRead(Atilde_v[sF](1)(spn) );
|
||||
sum = sum + conjugate(outerProduct(bb,aa));
|
||||
}
|
||||
}
|
||||
coalescedWrite(mat_v[sU](mu)(), sum);
|
||||
});
|
||||
}
|
||||
PokeIndex<LorentzIndex>(mat, tmp, mu);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffReal> GparityWilsonImplR; // Real.. whichever prec
|
||||
|
@ -55,6 +55,7 @@ static_assert(same_vComplex == 1, "Dirac Operators must have same underlying SIM
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
int nu = 0;
|
||||
int tbc_aprd = 0; //use antiperiodic BCs in the time direction?
|
||||
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
@ -62,6 +63,9 @@ int main (int argc, char ** argv)
|
||||
if(std::string(argv[i]) == "--Gparity-dir"){
|
||||
std::stringstream ss; ss << argv[i+1]; ss >> nu;
|
||||
std::cout << GridLogMessage << "Set Gparity direction to " << nu << std::endl;
|
||||
}else if(std::string(argv[i]) == "--Tbc-APRD"){
|
||||
tbc_aprd = 1;
|
||||
std::cout << GridLogMessage << "Using antiperiodic BCs in the time direction" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
@ -155,13 +159,18 @@ int main (int argc, char ** argv)
|
||||
|
||||
//Coordinate grid for reference
|
||||
LatticeInteger xcoor_1f5(FGrid_1f);
|
||||
LatticeCoordinate(xcoor_1f5,1+nu);
|
||||
LatticeCoordinate(xcoor_1f5,1+nu); //note '1+nu'! This is because for 5D fields the s-direction is direction 0
|
||||
Replicate(src,src_1f);
|
||||
src_1f = where( xcoor_1f5 >= Integer(L), 2.0*src_1f,src_1f );
|
||||
|
||||
RealD mass=0.0;
|
||||
RealD M5=1.8;
|
||||
StandardDiracOp Ddwf(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5 DOP_PARAMS);
|
||||
|
||||
//Standard Dirac op
|
||||
AcceleratorVector<Complex,4> bc_std(Nd, 1.0);
|
||||
if(tbc_aprd) bc_std[Nd-1] = -1.; //antiperiodic time BC
|
||||
StandardDiracOp::ImplParams std_params(bc_std);
|
||||
StandardDiracOp Ddwf(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5 DOP_PARAMS, std_params);
|
||||
|
||||
StandardFermionField src_o_1f(FrbGrid_1f);
|
||||
StandardFermionField result_o_1f(FrbGrid_1f);
|
||||
@ -172,9 +181,11 @@ int main (int argc, char ** argv)
|
||||
ConjugateGradient<StandardFermionField> CG(1.0e-8,10000);
|
||||
CG(HermOpEO,src_o_1f,result_o_1f);
|
||||
|
||||
// const int nu = 3;
|
||||
//Gparity Dirac op
|
||||
std::vector<int> twists(Nd,0);
|
||||
twists[nu] = 1;
|
||||
if(tbc_aprd) twists[Nd-1] = 1;
|
||||
|
||||
GparityDiracOp::ImplParams params;
|
||||
params.twists = twists;
|
||||
GparityDiracOp GPDdwf(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f,*UrbGrid_2f,mass,M5 DOP_PARAMS,params);
|
||||
@ -271,8 +282,11 @@ int main (int argc, char ** argv)
|
||||
std::cout << "2f cb "<<result_o_2f.Checkerboard()<<std::endl;
|
||||
std::cout << "1f cb "<<result_o_1f.Checkerboard()<<std::endl;
|
||||
|
||||
std::cout << " result norms " <<norm2(result_o_2f)<<" " <<norm2(result_o_1f)<<std::endl;
|
||||
//Compare norms
|
||||
std::cout << " result norms 2f: " <<norm2(result_o_2f)<<" 1f: " <<norm2(result_o_1f)<<std::endl;
|
||||
|
||||
|
||||
//Take the 2f solution and convert into the corresponding 1f solution (odd cb only)
|
||||
StandardFermionField res0o (FrbGrid_2f);
|
||||
StandardFermionField res1o (FrbGrid_2f);
|
||||
StandardFermionField res0 (FGrid_2f);
|
||||
@ -281,12 +295,13 @@ int main (int argc, char ** argv)
|
||||
res0=Zero();
|
||||
res1=Zero();
|
||||
|
||||
res0o = PeekIndex<0>(result_o_2f,0);
|
||||
res1o = PeekIndex<0>(result_o_2f,1);
|
||||
res0o = PeekIndex<0>(result_o_2f,0); //flavor 0, odd cb
|
||||
res1o = PeekIndex<0>(result_o_2f,1); //flavor 1, odd cb
|
||||
|
||||
std::cout << "res cb "<<res0o.Checkerboard()<<std::endl;
|
||||
std::cout << "res cb "<<res1o.Checkerboard()<<std::endl;
|
||||
|
||||
//poke odd onto non-cb field
|
||||
setCheckerboard(res0,res0o);
|
||||
setCheckerboard(res1,res1o);
|
||||
|
||||
@ -296,12 +311,13 @@ int main (int argc, char ** argv)
|
||||
Replicate(res0,replica0);
|
||||
Replicate(res1,replica1);
|
||||
|
||||
//2nd half of doubled lattice has f=1
|
||||
replica = where( xcoor_1f5 >= Integer(L), replica1,replica0 );
|
||||
|
||||
replica0 = Zero();
|
||||
setCheckerboard(replica0,result_o_1f);
|
||||
|
||||
std::cout << "Norm2 solutions is " <<norm2(replica)<<" "<< norm2(replica0)<<std::endl;
|
||||
std::cout << "Norm2 solutions 1f reconstructed from 2f: " <<norm2(replica)<<" Actual 1f: "<< norm2(replica0)<<std::endl;
|
||||
|
||||
replica = replica - replica0;
|
||||
|
||||
|
@ -71,26 +71,14 @@ int main (int argc, char ** argv)
|
||||
////////////////////////////////////
|
||||
RealD mass=0.2; //kills the diagonal term
|
||||
RealD M5=1.8;
|
||||
// const int nu = 3;
|
||||
// std::vector<int> twists(Nd,0); // twists[nu] = 1;
|
||||
// GparityDomainWallFermionR::ImplParams params; params.twists = twists;
|
||||
// GparityDomainWallFermionR Ddwf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
|
||||
|
||||
// DomainWallFermionR Dw (U, Grid,RBGrid,mass,M5);
|
||||
|
||||
const int nu = 3;
|
||||
const int nu = 0; //gparity direction
|
||||
std::vector<int> twists(Nd,0);
|
||||
twists[nu] = 1;
|
||||
twists[Nd-1] = 1; //antiperiodic in time
|
||||
GparityDomainWallFermionR::ImplParams params;
|
||||
params.twists = twists;
|
||||
|
||||
/*
|
||||
params.boundary_phases[0] = 1.0;
|
||||
params.boundary_phases[1] = 1.0;
|
||||
params.boundary_phases[2] = 1.0;
|
||||
params.boundary_phases[3] =- 1.0;
|
||||
*/
|
||||
|
||||
GparityDomainWallFermionR Dw(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
|
||||
|
||||
Dw.M (phi,Mphi);
|
||||
|
@ -71,8 +71,10 @@ int main (int argc, char ** argv)
|
||||
RealD mass=0.01;
|
||||
RealD M5=1.8;
|
||||
|
||||
const int nu = 3;
|
||||
std::vector<int> twists(Nd,0); twists[nu] = 1;
|
||||
const int nu = 1;
|
||||
std::vector<int> twists(Nd,0);
|
||||
twists[nu] = 1;
|
||||
twists[3] = 1;
|
||||
GparityDomainWallFermionR::ImplParams params; params.twists = twists;
|
||||
GparityDomainWallFermionR Ddwf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
|
||||
Ddwf.M (phi,Mphi);
|
||||
|
@ -64,8 +64,12 @@ int main (int argc, char ** argv)
|
||||
////////////////////////////////////
|
||||
RealD mass=0.01;
|
||||
|
||||
const int nu = 3;
|
||||
std::vector<int> twists(Nd,0); twists[nu] = 1;
|
||||
const int nu = 1;
|
||||
const int Lnu=latt_size[nu];
|
||||
|
||||
std::vector<int> twists(Nd,0);
|
||||
twists[nu] = 1;
|
||||
twists[3]=1;
|
||||
GparityWilsonFermionR::ImplParams params; params.twists = twists;
|
||||
GparityWilsonFermionR Wil(U,*UGrid,*UrbGrid,mass,params);
|
||||
Wil.M (phi,Mphi);
|
||||
|
@ -31,14 +31,38 @@ using namespace std;
|
||||
using namespace Grid;
|
||||
;
|
||||
|
||||
typedef typename GparityDomainWallFermionR::FermionField FermionField;
|
||||
template<typename Action>
|
||||
struct Setup{};
|
||||
|
||||
RealD AllZero(RealD x){ return 0.;}
|
||||
template<>
|
||||
struct Setup<GparityMobiusFermionR>{
|
||||
static GparityMobiusFermionR* getAction(LatticeGaugeField &Umu,
|
||||
GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){
|
||||
RealD mass=0.01;
|
||||
RealD M5=1.8;
|
||||
RealD mob_b=1.5;
|
||||
GparityMobiusFermionD ::ImplParams params;
|
||||
std::vector<int> twists({1,1,1,0});
|
||||
params.twists = twists;
|
||||
return new GparityMobiusFermionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params);
|
||||
}
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
template<>
|
||||
struct Setup<DomainWallFermionR>{
|
||||
static DomainWallFermionR* getAction(LatticeGaugeField &Umu,
|
||||
GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){
|
||||
RealD mass=0.01;
|
||||
RealD M5=1.8;
|
||||
return new DomainWallFermionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
template<typename Action>
|
||||
void run(){
|
||||
typedef typename Action::FermionField FermionField;
|
||||
const int Ls=8;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
@ -56,24 +80,10 @@ int main (int argc, char ** argv)
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
}
|
||||
Action *action = Setup<Action>::getAction(Umu,FGrid,FrbGrid,UGrid,UrbGrid);
|
||||
|
||||
RealD mass=0.01;
|
||||
RealD M5=1.8;
|
||||
RealD mob_b=1.5;
|
||||
// DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
GparityMobiusFermionD ::ImplParams params;
|
||||
std::vector<int> twists({1,1,1,0});
|
||||
params.twists = twists;
|
||||
GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params);
|
||||
|
||||
// MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
|
||||
// SchurDiagTwoOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
|
||||
SchurDiagTwoOperator<GparityMobiusFermionR,FermionField> HermOp(Ddwf);
|
||||
// SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
|
||||
//MdagMLinearOperator<Action,FermionField> HermOp(Ddwf);
|
||||
SchurDiagTwoOperator<Action,FermionField> HermOp(*action);
|
||||
|
||||
const int Nstop = 30;
|
||||
const int Nk = 40;
|
||||
@ -91,7 +101,6 @@ int main (int argc, char ** argv)
|
||||
|
||||
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt);
|
||||
|
||||
|
||||
std::vector<RealD> eval(Nm);
|
||||
FermionField src(FrbGrid);
|
||||
gaussian(RNG5rb,src);
|
||||
@ -103,6 +112,28 @@ int main (int argc, char ** argv)
|
||||
int Nconv;
|
||||
IRL.calc(eval,evec,src,Nconv);
|
||||
|
||||
delete action;
|
||||
}
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::string action = "GparityMobius";
|
||||
for(int i=1;i<argc;i++){
|
||||
if(std::string(argv[i]) == "-action"){
|
||||
action = argv[i+1];
|
||||
}
|
||||
}
|
||||
|
||||
if(action == "GparityMobius"){
|
||||
run<GparityMobiusFermionR>();
|
||||
}else if(action == "DWF"){
|
||||
run<DomainWallFermionR>();
|
||||
}else{
|
||||
std::cout << "Unknown action" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
184
tests/solver/Test_dwf_multishift_mixedprec.cc
Normal file
184
tests/solver/Test_dwf_multishift_mixedprec.cc
Normal file
@ -0,0 +1,184 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_dwf_multishift_mixedprec.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace Grid;
|
||||
|
||||
template<typename SpeciesD, typename SpeciesF, typename GaugeStatisticsType>
|
||||
void run_test(int argc, char ** argv, const typename SpeciesD::ImplParams ¶ms){
|
||||
const int Ls = 16;
|
||||
GridCartesian* UGrid_d = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexD::Nsimd()), GridDefaultMpi());
|
||||
GridRedBlackCartesian* UrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_d);
|
||||
GridCartesian* FGrid_d = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid_d);
|
||||
GridRedBlackCartesian* FrbGrid_d = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid_d);
|
||||
|
||||
GridCartesian* UGrid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
||||
GridRedBlackCartesian* UrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_f);
|
||||
GridCartesian* FGrid_f = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid_f);
|
||||
GridRedBlackCartesian* FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid_f);
|
||||
|
||||
typedef typename SpeciesD::FermionField FermionFieldD;
|
||||
typedef typename SpeciesF::FermionField FermionFieldF;
|
||||
|
||||
std::vector<int> seeds4({1, 2, 3, 4});
|
||||
std::vector<int> seeds5({5, 6, 7, 8});
|
||||
GridParallelRNG RNG5(FGrid_d);
|
||||
RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid_d);
|
||||
RNG4.SeedFixedIntegers(seeds4);
|
||||
|
||||
FermionFieldD src_d(FGrid_d);
|
||||
random(RNG5, src_d);
|
||||
|
||||
LatticeGaugeFieldD Umu_d(UGrid_d);
|
||||
|
||||
//CPS-created G-parity ensembles have a factor of 2 error in the plaquette that causes the read to fail unless we workaround it
|
||||
bool gparity_plaquette_fix = false;
|
||||
for(int i=1;i<argc;i++){
|
||||
if(std::string(argv[i]) == "--gparity_plaquette_fix"){
|
||||
gparity_plaquette_fix=true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool cfg_loaded=false;
|
||||
for(int i=1;i<argc;i++){
|
||||
if(std::string(argv[i]) == "--load_config"){
|
||||
assert(i != argc-1);
|
||||
std::string file = argv[i+1];
|
||||
NerscIO io;
|
||||
FieldMetaData metadata;
|
||||
|
||||
if(gparity_plaquette_fix) NerscIO::exitOnReadPlaquetteMismatch() = false;
|
||||
|
||||
io.readConfiguration<GaugeStatisticsType>(Umu_d, metadata, file);
|
||||
|
||||
if(gparity_plaquette_fix){
|
||||
metadata.plaquette *= 2.; //correct header value
|
||||
|
||||
//Get the true plaquette
|
||||
FieldMetaData tmp;
|
||||
GaugeStatisticsType gs; gs(Umu_d, tmp);
|
||||
|
||||
std::cout << "After correction: plaqs " << tmp.plaquette << " " << metadata.plaquette << std::endl;
|
||||
assert(fabs(tmp.plaquette -metadata.plaquette ) < 1.0e-5 );
|
||||
}
|
||||
|
||||
cfg_loaded=true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(!cfg_loaded)
|
||||
SU<Nc>::HotConfiguration(RNG4, Umu_d);
|
||||
|
||||
LatticeGaugeFieldF Umu_f(UGrid_f);
|
||||
precisionChange(Umu_f, Umu_d);
|
||||
|
||||
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl;
|
||||
|
||||
RealD mass = 0.01;
|
||||
RealD M5 = 1.8;
|
||||
SpeciesD Ddwf_d(Umu_d, *FGrid_d, *FrbGrid_d, *UGrid_d, *UrbGrid_d, mass, M5, params);
|
||||
SpeciesF Ddwf_f(Umu_f, *FGrid_f, *FrbGrid_f, *UGrid_f, *UrbGrid_f, mass, M5, params);
|
||||
|
||||
FermionFieldD src_o_d(FrbGrid_d);
|
||||
pickCheckerboard(Odd, src_o_d, src_d);
|
||||
|
||||
SchurDiagMooeeOperator<SpeciesD, FermionFieldD> HermOpEO_d(Ddwf_d);
|
||||
SchurDiagMooeeOperator<SpeciesF, FermionFieldF> HermOpEO_f(Ddwf_f);
|
||||
|
||||
AlgRemez remez(1e-4, 64, 50);
|
||||
int order = 15;
|
||||
remez.generateApprox(order, 1, 2); //sqrt
|
||||
|
||||
MultiShiftFunction shifts(remez, 1e-10, false);
|
||||
|
||||
int relup_freq = 50;
|
||||
double t1=usecond();
|
||||
ConjugateGradientMultiShiftMixedPrec<FermionFieldD,FermionFieldF> mcg(10000, shifts, FrbGrid_f, HermOpEO_f, relup_freq);
|
||||
|
||||
std::vector<FermionFieldD> results_o_d(order, FrbGrid_d);
|
||||
mcg(HermOpEO_d, src_o_d, results_o_d);
|
||||
double t2=usecond();
|
||||
|
||||
//Crosscheck double and mixed prec results
|
||||
ConjugateGradientMultiShift<FermionFieldD> dmcg(10000, shifts);
|
||||
std::vector<FermionFieldD> results_o_d_2(order, FrbGrid_d);
|
||||
dmcg(HermOpEO_d, src_o_d, results_o_d_2);
|
||||
double t3=usecond();
|
||||
|
||||
std::cout << GridLogMessage << "Comparison of mixed prec results to double prec results |mixed - double|^2 :" << std::endl;
|
||||
FermionFieldD tmp(FrbGrid_d);
|
||||
for(int i=0;i<order;i++){
|
||||
RealD ndiff = axpy_norm(tmp, -1., results_o_d[i], results_o_d_2[i]);
|
||||
std::cout << i << " " << ndiff << std::endl;
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage << "Mixed precision algorithm: Total usec = "<< (t2-t1)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Double precision algorithm: Total usec = "<< (t3-t2)<<std::endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc, &argv);
|
||||
|
||||
bool gparity = false;
|
||||
int gpdir;
|
||||
|
||||
for(int i=1;i<argc;i++){
|
||||
std::string arg(argv[i]);
|
||||
if(arg == "--Gparity"){
|
||||
assert(i!=argc-1);
|
||||
gpdir = std::stoi(argv[i+1]);
|
||||
assert(gpdir >= 0 && gpdir <= 2); //spatial!
|
||||
gparity = true;
|
||||
}
|
||||
}
|
||||
if(gparity){
|
||||
std::cout << "Running test with G-parity BCs in " << gpdir << " direction" << std::endl;
|
||||
GparityWilsonImplParams params;
|
||||
params.twists[gpdir] = 1;
|
||||
|
||||
std::vector<int> conj_dirs(Nd,0);
|
||||
conj_dirs[gpdir] = 1;
|
||||
ConjugateGimplD::setDirections(conj_dirs);
|
||||
|
||||
run_test<GparityDomainWallFermionD, GparityDomainWallFermionF, ConjugateGaugeStatistics>(argc,argv,params);
|
||||
}else{
|
||||
std::cout << "Running test with periodic BCs" << std::endl;
|
||||
WilsonImplParams params;
|
||||
run_test<DomainWallFermionD, DomainWallFermionF, PeriodicGaugeStatistics>(argc,argv,params);
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user