1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-10-25 02:04:48 +01:00

Compare commits

...

3 Commits

Author SHA1 Message Date
Chulwoo Jung
1e3fb32572 Checking in working version of Lanczos. 2017-03-21 16:45:33 -04:00
Chulwoo Jung
0d5af667d8 Works with CPS evolution 2017-03-21 12:40:43 -04:00
Chulwoo Jung
e9712bc7fb Zmobius test was wrong! (only mobius) checking in again 2017-03-16 23:04:28 -04:00
9 changed files with 778 additions and 108 deletions

View File

@@ -39,6 +39,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/algorithms/approx/MultiShiftFunction.h> #include <Grid/algorithms/approx/MultiShiftFunction.h>
#include <Grid/algorithms/iterative/ConjugateGradient.h> #include <Grid/algorithms/iterative/ConjugateGradient.h>
#include <Grid/algorithms/iterative/ConjugateGradientShifted.h>
#include <Grid/algorithms/iterative/ConjugateResidual.h> #include <Grid/algorithms/iterative/ConjugateResidual.h>
#include <Grid/algorithms/iterative/NormalEquations.h> #include <Grid/algorithms/iterative/NormalEquations.h>
#include <Grid/algorithms/iterative/SchurRedBlack.h> #include <Grid/algorithms/iterative/SchurRedBlack.h>

View File

@@ -45,8 +45,6 @@ class ConjugateGradient : public OperatorFunction<Field> {
// Defaults true. // Defaults true.
RealD Tolerance; RealD Tolerance;
Integer MaxIterations; Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol), : Tolerance(tol),
MaxIterations(maxit), MaxIterations(maxit),
@@ -157,14 +155,13 @@ class ConjugateGradient : public OperatorFunction<Field> {
std::cout << std::endl; std::cout << std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
IterationsToComplete = k;
return; return;
} }
} }
std::cout << GridLogMessage << "ConjugateGradient did NOT converge" std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
<< std::endl; << std::endl;
if (ErrorOnNoConverge) assert(0); if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
} }
}; };
} }

View File

@@ -35,7 +35,6 @@ namespace Grid {
class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> { class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
public: public:
RealD Tolerance; RealD Tolerance;
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations; Integer MaxInnerIterations;
Integer MaxOuterIterations; Integer MaxOuterIterations;
GridBase* SinglePrecGrid; //Grid for single-precision fields GridBase* SinglePrecGrid; //Grid for single-precision fields
@@ -43,16 +42,12 @@ namespace Grid {
LinearOperatorBase<FieldF> &Linop_f; LinearOperatorBase<FieldF> &Linop_f;
LinearOperatorBase<FieldD> &Linop_d; LinearOperatorBase<FieldD> &Linop_d;
Integer TotalInnerIterations; //Number of inner CG iterations
Integer TotalOuterIterations; //Number of restarts
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
LinearFunction<FieldF> *guesser; LinearFunction<FieldF> *guesser;
MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) : MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
Linop_f(_Linop_f), Linop_d(_Linop_d), Linop_f(_Linop_f), Linop_d(_Linop_d),
Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
OuterLoopNormMult(100.), guesser(NULL){ }; OuterLoopNormMult(100.), guesser(NULL){ };
void useGuesser(LinearFunction<FieldF> &g){ void useGuesser(LinearFunction<FieldF> &g){
@@ -60,8 +55,9 @@ namespace Grid {
} }
void operator() (const FieldD &src_d_in, FieldD &sol_d){ void operator() (const FieldD &src_d_in, FieldD &sol_d){
TotalInnerIterations = 0; (*this)(src_d_in,sol_d,NULL);
}
void operator() (const FieldD &src_d_in, FieldD &sol_d, RealD *shift){
GridStopWatch TotalTimer; GridStopWatch TotalTimer;
TotalTimer.Start(); TotalTimer.Start();
@@ -81,7 +77,7 @@ namespace Grid {
FieldD src_d(DoublePrecGrid); FieldD src_d(DoublePrecGrid);
src_d = src_d_in; //source for next inner iteration, computed from residual during operation src_d = src_d_in; //source for next inner iteration, computed from residual during operation
RealD inner_tol = InnerTolerance; RealD inner_tol = Tolerance;
FieldF src_f(SinglePrecGrid); FieldF src_f(SinglePrecGrid);
src_f.checkerboard = cb; src_f.checkerboard = cb;
@@ -89,18 +85,17 @@ namespace Grid {
FieldF sol_f(SinglePrecGrid); FieldF sol_f(SinglePrecGrid);
sol_f.checkerboard = cb; sol_f.checkerboard = cb;
ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations); ConjugateGradientShifted<FieldF> CG_f(inner_tol, MaxInnerIterations);
CG_f.ErrorOnNoConverge = false; CG_f.ErrorOnNoConverge = false;
GridStopWatch InnerCGtimer; GridStopWatch InnerCGtimer;
GridStopWatch PrecChangeTimer; GridStopWatch PrecChangeTimer;
Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
//Compute double precision rsd and also new RHS vector. //Compute double precision rsd and also new RHS vector.
Linop_d.HermOp(sol_d, tmp_d); Linop_d.HermOp(sol_d, tmp_d);
if(shift) axpy(tmp_d,*shift,sol_d,tmp_d);
RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl; std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl;
@@ -124,9 +119,8 @@ namespace Grid {
//Inner CG //Inner CG
CG_f.Tolerance = inner_tol; CG_f.Tolerance = inner_tol;
InnerCGtimer.Start(); InnerCGtimer.Start();
CG_f(Linop_f, src_f, sol_f); CG_f(Linop_f, src_f, sol_f,shift);
InnerCGtimer.Stop(); InnerCGtimer.Stop();
TotalInnerIterations += CG_f.IterationsToComplete;
//Convert sol back to double and add to double prec solution //Convert sol back to double and add to double prec solution
PrecChangeTimer.Start(); PrecChangeTimer.Start();
@@ -139,13 +133,11 @@ namespace Grid {
//Final trial CG //Final trial CG
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl; std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl;
ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations); ConjugateGradientShifted<FieldD> CG_d(Tolerance, MaxInnerIterations);
CG_d(Linop_d, src_d_in, sol_d); CG_d(Linop_d, src_d_in, sol_d,shift);
TotalFinalStepIterations = CG_d.IterationsToComplete;
TotalTimer.Stop(); TotalTimer.Stop();
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl; std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total time " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
} }
}; };

View File

@@ -45,6 +45,7 @@ public:
Integer MaxIterations; Integer MaxIterations;
int verbose; int verbose;
MultiShiftFunction shifts; MultiShiftFunction shifts;
int iter;
ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) : ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) :
MaxIterations(maxit), MaxIterations(maxit),
@@ -60,6 +61,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, Field &psi)
std::vector<Field> results(nshift,grid); std::vector<Field> results(nshift,grid);
(*this)(Linop,src,results,psi); (*this)(Linop,src,results,psi);
} }
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &results, Field &psi) void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &results, Field &psi)
{ {
int nshift = shifts.order; int nshift = shifts.order;
@@ -105,11 +107,12 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
RealD a,b,c,d; RealD a,b,c,d;
RealD cp,bp,qq; //prev RealD cp,bp,qq; //prev
int cb=src.checkerboard;
// Matrix mult fields // Matrix mult fields
Field r(grid); Field r(grid);
Field p(grid); Field p(grid); p.checkerboard = src.checkerboard;
Field tmp(grid); Field tmp(grid);
Field mmp(grid); Field mmp(grid);mmp.checkerboard = src.checkerboard;
// Check lightest mass // Check lightest mass
for(int s=0;s<nshift;s++){ for(int s=0;s<nshift;s++){
@@ -132,6 +135,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
p=src; p=src;
//MdagM+m[0] //MdagM+m[0]
std::cout << "p.checkerboard " << p.checkerboard
<< "mmp.checkerboard " << mmp.checkerboard << std::endl;
Linop.HermOpAndNorm(p,mmp,d,qq); Linop.HermOpAndNorm(p,mmp,d,qq);
axpy(mmp,mass[0],p,mmp); axpy(mmp,mass[0],p,mmp);
RealD rn = norm2(p); RealD rn = norm2(p);
@@ -269,6 +275,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
RealD cn = norm2(src); RealD cn = norm2(src);
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl; std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
} }
iter = k;
return; return;
} }
} }

View File

@@ -0,0 +1,404 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h
Copyright (C) 2015
Author: Chulwoo Jung <chulwoo@quark.phy.bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END/ LEGAL */
#ifndef GRID_CONJUGATE_GRADIENT_MULTI_MIXED_PREC_H
#define GRID_CONJUGATE_GRADIENT_MULTI_MIXED_PREC_H
namespace Grid {
//Mixed precision restarted defect correction CG
template<class FieldD,class FieldF
//, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0
//, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0
>
class MixedPrecisionConjugateGradientMultiShift : public LinearFunction<FieldD> {
public:
// RealD Tolerance;
Integer MaxInnerIterations;
Integer MaxOuterIterations;
GridBase* SinglePrecGrid; //Grid for single-precision fields
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
LinearOperatorBase<FieldF> &Linop_f;
LinearOperatorBase<FieldD> &Linop_d;
MultiShiftFunction shifts;
Integer iter;
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
// LinearFunction<FieldF> *guesser;
MixedPrecisionConjugateGradientMultiShift(GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d,
Integer maxinnerit, MultiShiftFunction &_shifts ) :
Linop_f(_Linop_f), Linop_d(_Linop_d),
MaxInnerIterations(maxinnerit), SinglePrecGrid(_sp_grid),
OuterLoopNormMult(100.), shifts(_shifts) {};
void operator() (const FieldD &src_d_in, FieldD &sol_d){
assert(0); // not yet implemented
}
void operator() (const FieldD &src_d_in, std::vector<FieldD> &sol_d){
GridStopWatch TotalTimer;
TotalTimer.Start();
int cb = src_d_in.checkerboard;
int nshift = shifts.order;
assert(nshift == sol_d.size());
for(int i=0;i<nshift;i++) sol_d[i].checkerboard = cb;
RealD src_norm = norm2(src_d_in);
// RealD stop = src_norm * Tolerance*Tolerance;
GridBase* DoublePrecGrid = src_d_in._grid;
FieldD tmp_d(DoublePrecGrid); tmp_d.checkerboard = cb;
FieldD tmp2_d(DoublePrecGrid); tmp2_d.checkerboard = cb;
FieldD src_d(DoublePrecGrid);
src_d = src_d_in; //source for next inner iteration, computed from residual during operation
// RealD inner_tol = Tolerance;
FieldD psi_d(DoublePrecGrid);psi_d.checkerboard = cb;
FieldF src_f(SinglePrecGrid);
src_f.checkerboard = cb;
std::vector<FieldF> sol_f(nshift,SinglePrecGrid);
for(int i=0;i<nshift;i++) sol_f[i].checkerboard = cb;
// ConjugateGradientShifted<FieldF> CG_f(inner_tol, MaxInnerIterations);
ConjugateGradientMultiShift<FieldF> MSCG(MaxInnerIterations,shifts);
// CG_f.ErrorOnNoConverge = false;
GridStopWatch InnerCGtimer;
GridStopWatch PrecChangeTimer;
{
// std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl;
// if(norm < OuterLoopNormMult * stop){
// std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration converged on iteration " <<outer_iter <<std::endl;
// break;
// }
// while(norm * inner_tol * inner_tol < stop) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ??
PrecChangeTimer.Start();
precisionChange(src_f, src_d);
PrecChangeTimer.Stop();
// zeroit(sol_f);
//Inner CG
InnerCGtimer.Start();
int if_relup = 0;
#if 0
MSCG(Linop_f,src_f,sol_f);
#else
{
GridBase *grid = SinglePrecGrid;
////////////////////////////////////////////////////////////////////////
// Convenience references to the info stored in "MultiShiftFunction"
////////////////////////////////////////////////////////////////////////
int nshift = shifts.order;
std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts"
std::vector<RealD> &mresidual(shifts.tolerances);
std::vector<RealD> alpha(nshift,1.);
std::vector<FieldF> ps(nshift,grid);// Search directions
assert(sol_f.size()==nshift);
assert(mass.size()==nshift);
assert(mresidual.size()==nshift);
// dynamic sized arrays on stack; 2d is a pain with vector
RealD bs[nshift];
RealD rsq[nshift];
RealD z[nshift][2];
int converged[nshift];
const int primary =0;
//Primary shift fields CG iteration
RealD a,b,c,d;
RealD cp,bp,qq; //prev
int cb=src_f.checkerboard;
// Matrix mult fields
FieldF r(grid); r.checkerboard = src_f.checkerboard;
FieldF p(grid); p.checkerboard = src_f.checkerboard;
FieldF tmp(grid); tmp.checkerboard = src_f.checkerboard;
FieldF mmp(grid);mmp.checkerboard = src_f.checkerboard;
FieldF psi(grid);psi.checkerboard = src_f.checkerboard;
std::cout.precision(12);
std::cout<<GridLogMessage<<"norm2(psi_d)= "<<norm2(psi_d)<<std::endl;
std::cout<<GridLogMessage<<"norm2(psi)= "<<norm2(psi)<<std::endl;
// Check lightest mass
for(int s=0;s<nshift;s++){
assert( mass[s]>= mass[primary] );
converged[s]=0;
}
// Wire guess to zero
// Residuals "r" are src
// First search direction "p" is also src
cp = norm2(src_f);
Real c_relup = cp;
for(int s=0;s<nshift;s++){
rsq[s] = cp * mresidual[s] * mresidual[s];
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradientMultiShift: shift "<<s
<<" target resid "<<rsq[s]<<std::endl;
ps[s] = src_f;
}
// r and p for primary
r=src_f;
p=src_f;
//MdagM+m[0]
std::cout << "p.checkerboard " << p.checkerboard
<< "mmp.checkerboard " << mmp.checkerboard << std::endl;
Linop_f.HermOpAndNorm(p,mmp,d,qq);
axpy(mmp,mass[0],p,mmp);
RealD rn = norm2(p);
d += rn*mass[0];
// have verified that inner product of
// p and mmp is equal to d after this since
// the d computation is tricky
// qq = real(innerProduct(p,mmp));
// std::cout<<GridLogMessage << "debug equal ? qq "<<qq<<" d "<< d<<std::endl;
b = -cp /d;
// Set up the various shift variables
int iz=0;
z[0][1-iz] = 1.0;
z[0][iz] = 1.0;
bs[0] = b;
for(int s=1;s<nshift;s++){
z[s][1-iz] = 1.0;
z[s][iz] = 1.0/( 1.0 - b*(mass[s]-mass[0]));
bs[s] = b*z[s][iz];
}
// r += b[0] A.p[0]
// c= norm(r)
c=axpy_norm(r,b,mmp,r);
axpby(psi,0.,-bs[0],src_f,src_f);
for(int s=0;s<nshift;s++) {
axpby(sol_f[s],0.,-bs[s]*alpha[s],src_f,src_f);
}
// Iteration loop
int k;
// inefficient zeroing, please replace!
// RealD sol_norm = axpy_norm(sol_d[0],-1.,sol_d[0],sol_d[0]);
zeroit(sol_d[0]);
std::cout<<GridLogMessage<<"norm(sol_d[0])= "<<norm2(sol_d[0])<<std::endl;
int all_converged = 1;
RealD tmp1,tmp2;
for (k=1;k<=MaxOuterIterations;k++){
a = c /cp;
axpy(p,a,p,r);
// Note to self - direction ps is iterated seperately
// for each shift. Does not appear to have any scope
// for avoiding linear algebra in "single" case.
//
// However SAME r is used. Could load "r" and update
// ALL ps[s]. 2/3 Bandwidth saving
// New Kernel: Load r, vector of coeffs, vector of pointers ps
for(int s=0;s<nshift;s++){
if ( ! converged[s] ) {
if (s==0){
axpy(ps[s],a,ps[s],r);
} else{
RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b);
axpby(ps[s],z[s][iz],as,r,ps[s]);
}
}
}
cp=c;
Linop_f.HermOpAndNorm(p,mmp,d,qq);
axpy(mmp,mass[0],p,mmp);
RealD rn = norm2(p);
d += rn*mass[0];
bp=b;
b=-cp/d;
c=axpy_norm(r,b,mmp,r);
// Toggle the recurrence history
bs[0] = b;
iz = 1-iz;
for(int s=1;s<nshift;s++){
if((!converged[s])){
RealD z0 = z[s][1-iz];
RealD z1 = z[s][iz];
z[s][iz] = z0*z1*bp
/ (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
}
}
axpy(psi,-bs[0],ps[0],psi);
for(int s=0;s<nshift;s++){
int ss = s;
// Scope for optimisation here in case of "single".
// Could load sol_f[0] and pull all ps[s] in.
// if ( single ) ss=primary;
// Bandwith saving in single case is Ls * 3 -> 2+Ls, so ~ 3x saving
// Pipelined CG gain:
//
// New Kernel: Load r, vector of coeffs, vector of pointers ps
// New Kernel: Load sol_f[0], vector of coeffs, vector of pointers ps
// If can predict the coefficient bs then we can fuse these and avoid write reread cyce
// on ps[s].
// Before: 3 x npole + 3 x npole
// After : 2 x npole (ps[s]) => 3x speed up of multishift CG.
if( (!converged[s]) ) {
axpy(sol_f[ss],-bs[s]*alpha[s],ps[s],sol_f[ss]);
}
}
if (k%MaxInnerIterations==0){
// if (c < 1e-4*c_relup){
RealD c_f=c;
precisionChange(tmp_d,psi);
RealD sol_norm =axpy_norm (psi_d,1.,tmp_d,psi_d);
tmp1 = norm2(psi);
zeroit(psi);
tmp2 = norm2(psi);
std::cout<<GridLogMessage<<"k= "<<k<<" norm2(sol)= "<<sol_norm<<" "<<tmp1<<" "<<tmp2<<std::endl;
// precisionChange(sol_d[0],sol_f[0]);
Linop_d.HermOpAndNorm(psi_d,tmp_d,tmp1,tmp2);
axpy(tmp2_d,mass[0],psi_d,tmp_d);
axpy(tmp_d,-1.,tmp2_d,src_d);
precisionChange(r,tmp_d);
c_relup = norm2(r);
std::cout<<GridLogMessage<<"k= "<<k<<" norm2(r)= "<<c<<" "<<c_relup<<" "<<c_f<<std::endl;
if_relup=1;
}
// Convergence checks
all_converged=1;
for(int s=0;s<nshift;s++){
if ( (!converged[s]) ){
RealD css = c * z[s][iz]* z[s][iz];
if(css<rsq[s]){
if ( ! converged[s] )
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
converged[s]=1;
} else {
if (k%MaxInnerIterations==0)
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has not converged "<<css<<"<"<<rsq[s]<<std::endl;
all_converged=0;
}
}
}
#if 0
if ( all_converged ){
std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
#else
if ( converged[0] ){
std::cout<<GridLogMessage<< "CGMultiShift: Shift 0 have converged iteration, terminating "<<k<<std::endl;
#endif
#if 1
for(int s=1; s < nshift; s++) {
Linop_f.HermOpAndNorm(sol_f[s],mmp,d,qq);
axpy(tmp,mass[s],sol_f[s],mmp);
axpy(r,-alpha[s],src_f,tmp);
RealD rn = norm2(r);
RealD cn = norm2(src_f);
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
}
#endif
iter = k;
break;
}
}
// ugly hack
if ( !all_converged )
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
// assert(0);
}
#endif
InnerCGtimer.Stop();
//Convert sol back to double and add to double prec solution
PrecChangeTimer.Start();
sol_d[0]=psi_d;
for(int i=1;i<nshift;i++)precisionChange(sol_d[i], sol_f[i]);
std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
// Check answers
for(int s=0; s < nshift; s++) {
RealD tmp1,tmp2;
Linop_d.HermOpAndNorm(sol_d[s],tmp_d,tmp1,tmp2);
axpy(tmp2_d,shifts.poles[s],sol_d[s],tmp_d);
axpy(tmp_d,-1.,src_d,tmp2_d);
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(norm2(tmp_d)/norm2(src_d))<<std::endl;
}
PrecChangeTimer.Stop();
}
//Final trial CG
// std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl;
TotalTimer.Stop();
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
}
};
}
#endif

View File

@@ -0,0 +1,168 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ConjugateGradient.h
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_CONJUGATE_GRADIENT_SHIFTED_H
#define GRID_CONJUGATE_GRADIENT_SHIFTED_H
namespace Grid {
/////////////////////////////////////////////////////////////
// Base classes for iterative processes based on operators
// single input vec, single output vec.
/////////////////////////////////////////////////////////////
template<class Field>
class ConjugateGradientShifted : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; //throw an assert when the CG fails to converge. Defaults true.
RealD Tolerance;
Integer MaxIterations;
ConjugateGradientShifted(RealD tol,Integer maxit, bool err_on_no_conv = true) : Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) {
};
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi ){
(*this)(Linop,src,psi,NULL);
}
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi, RealD *shift){
psi.checkerboard = src.checkerboard;
conformable(psi,src);
RealD cp,c,a,d,b,ssq,qq,b_pred;
Field p(src);
Field mmp(src);
Field r(src);
//Initial residual computation & set up
RealD guess = norm2(psi);
assert(std::isnan(guess)==0);
Linop.HermOpAndNorm(psi,mmp,d,b);
if(shift) axpy(mmp,*shift,psi,mmp);
RealD rn = norm2(psi);
if(shift) d += rn*(*shift);
RealD d2 = real(innerProduct(psi,mmp));
b= norm2(mmp);
RealD src_norm=norm2(src);
r= src-mmp;
p= r;
a =norm2(p);
cp =a;
ssq=norm2(src);
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: cp,r "<<cp <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
RealD rsq = Tolerance* Tolerance*ssq;
//Check if guess is really REALLY good :)
if ( cp <= rsq ) {
return;
}
std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" target "<<rsq<<std::endl;
GridStopWatch LinalgTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k=1;k<=MaxIterations;k++){
c=cp;
MatrixTimer.Start();
Linop.HermOpAndNorm(p,mmp,d,qq);
MatrixTimer.Stop();
LinalgTimer.Start();
if(shift) axpy(mmp,*shift,p,mmp);
RealD rn = norm2(p);
if(shift) d += rn*(*shift);
RealD d2 = real(innerProduct(p,mmp));
qq = norm2(mmp);
if (k%10==1) std::cout<< std::setprecision(4)<< "d: "<<d<<" d2= "<<d2<<std::endl;
// RealD qqck = norm2(mmp);
// ComplexD dck = innerProduct(p,mmp);
a = c/d;
b_pred = a*(a*qq-d)/c;
cp = axpy_norm(r,-a,mmp,r);
b = cp/c;
if (k%10==1) std::cout<< std::setprecision(4)<<"k= "<<k<<" src: "<<src_norm<<" r= "<<cp<<std::endl;
// Fuse these loops ; should be really easy
psi= a*p+psi;
p = p*b+r;
LinalgTimer.Stop();
std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target "<< rsq<<std::endl;
// Stopping condition
if ( cp <= rsq ) {
SolverTimer.Stop();
Linop.HermOpAndNorm(psi,mmp,d,qq);
if(shift) mmp = mmp + (*shift) * psi;
p=mmp-src;
RealD mmpnorm = sqrt(norm2(mmp));
RealD psinorm = sqrt(norm2(psi));
RealD srcnorm = sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm/srcnorm;
std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k
<<" computed residual "<<sqrt(cp/ssq)
<<" true residual " <<true_residual
<<" target "<<Tolerance<<std::endl;
std::cout<<GridLogMessage<<"Time elapsed: Total "<< SolverTimer.Elapsed() << " Matrix "<<MatrixTimer.Elapsed() << " Linalg "<<LinalgTimer.Elapsed();
std::cout<<std::endl;
if(ErrorOnNoConverge)
assert(true_residual/Tolerance < 1000.0);
return;
}
}
std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl;
// assert(0);
}
};
}
#endif

View File

@@ -31,11 +31,16 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <string.h> //memset #include <string.h> //memset
#ifdef USE_LAPACK #ifdef USE_LAPACK
#ifdef USE_MKL
#include<mkl_lapack.h>
#else
void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
double *vl, double *vu, int *il, int *iu, double *abstol, double *vl, double *vu, int *il, int *iu, double *abstol,
int *m, double *w, double *z, int *ldz, int *isuppz, int *m, double *w, double *z, int *ldz, int *isuppz,
double *work, int *lwork, int *iwork, int *liwork, double *work, int *lwork, int *iwork, int *liwork,
int *info); int *info);
//#include <lapacke/lapacke.h>
#endif
#endif #endif
#include "DenseMatrix.h" #include "DenseMatrix.h"
#include "EigenSort.h" #include "EigenSort.h"
@@ -62,12 +67,13 @@ public:
int Np; // Np -- Number of spare vecs in kryloc space int Np; // Np -- Number of spare vecs in kryloc space
int Nm; // Nm -- total number of vectors int Nm; // Nm -- total number of vectors
RealD OrthoTime;
RealD eresid; RealD eresid;
SortEigen<Field> _sort; SortEigen<Field> _sort;
// GridCartesian &_fgrid;
LinearOperatorBase<Field> &_Linop; LinearOperatorBase<Field> &_Linop;
OperatorFunction<Field> &_poly; OperatorFunction<Field> &_poly;
@@ -124,23 +130,23 @@ public:
GridBase *grid = evec[0]._grid; GridBase *grid = evec[0]._grid;
Field w(grid); Field w(grid);
std::cout << "RitzMatrix "<<std::endl; std::cout<<GridLogMessage << "RitzMatrix "<<std::endl;
for(int i=0;i<k;i++){ for(int i=0;i<k;i++){
_poly(_Linop,evec[i],w); _poly(_Linop,evec[i],w);
std::cout << "["<<i<<"] "; std::cout<<GridLogMessage << "["<<i<<"] ";
for(int j=0;j<k;j++){ for(int j=0;j<k;j++){
ComplexD in = innerProduct(evec[j],w); ComplexD in = innerProduct(evec[j],w);
if ( fabs((double)i-j)>1 ) { if ( fabs((double)i-j)>1 ) {
if (abs(in) >1.0e-9 ) { if (abs(in) >1.0e-9 ) {
std::cout<<"oops"<<std::endl; std::cout<<GridLogMessage<<"oops"<<std::endl;
abort(); abort();
} else } else
std::cout << " 0 "; std::cout<<GridLogMessage << " 0 ";
} else { } else {
std::cout << " "<<in<<" "; std::cout<<GridLogMessage << " "<<in<<" ";
} }
} }
std::cout << std::endl; std::cout<<GridLogMessage << std::endl;
} }
} }
@@ -174,10 +180,10 @@ public:
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
// 7. vk+1 := wk/βk+1 // 7. vk+1 := wk/βk+1
// std::cout << "alpha = " << zalph << " beta "<<beta<<std::endl; std::cout<<GridLogMessage << "alpha = " << zalph << " beta "<<beta<<std::endl;
const RealD tiny = 1.0e-20; const RealD tiny = 1.0e-20;
if ( beta < tiny ) { if ( beta < tiny ) {
std::cout << " beta is tiny "<<beta<<std::endl; std::cout<<GridLogMessage << " beta is tiny "<<beta<<std::endl;
} }
lmd[k] = alph; lmd[k] = alph;
lme[k] = beta; lme[k] = beta;
@@ -253,6 +259,7 @@ public:
} }
#ifdef USE_LAPACK #ifdef USE_LAPACK
#define LAPACK_INT long long
void diagonalize_lapack(DenseVector<RealD>& lmd, void diagonalize_lapack(DenseVector<RealD>& lmd,
DenseVector<RealD>& lme, DenseVector<RealD>& lme,
int N1, int N1,
@@ -262,7 +269,7 @@ public:
const int size = Nm; const int size = Nm;
// tevals.resize(size); // tevals.resize(size);
// tevecs.resize(size); // tevecs.resize(size);
int NN = N1; LAPACK_INT NN = N1;
double evals_tmp[NN]; double evals_tmp[NN];
double evec_tmp[NN][NN]; double evec_tmp[NN][NN];
memset(evec_tmp[0],0,sizeof(double)*NN*NN); memset(evec_tmp[0],0,sizeof(double)*NN*NN);
@@ -276,19 +283,19 @@ public:
if (i==j) evals_tmp[i] = lmd[i]; if (i==j) evals_tmp[i] = lmd[i];
if (j==(i-1)) EE[j] = lme[j]; if (j==(i-1)) EE[j] = lme[j];
} }
int evals_found; LAPACK_INT evals_found;
int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; LAPACK_INT lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
int liwork = 3+NN*10 ; LAPACK_INT liwork = 3+NN*10 ;
int iwork[liwork]; LAPACK_INT iwork[liwork];
double work[lwork]; double work[lwork];
int isuppz[2*NN]; LAPACK_INT isuppz[2*NN];
char jobz = 'V'; // calculate evals & evecs char jobz = 'V'; // calculate evals & evecs
char range = 'I'; // calculate all evals char range = 'I'; // calculate all evals
// char range = 'A'; // calculate all evals // char range = 'A'; // calculate all evals
char uplo = 'U'; // refer to upper half of original matrix char uplo = 'U'; // refer to upper half of original matrix
char compz = 'I'; // Compute eigenvectors of tridiagonal matrix char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
int ifail[NN]; int ifail[NN];
int info; long long info;
// int total = QMP_get_number_of_nodes(); // int total = QMP_get_number_of_nodes();
// int node = QMP_get_node_number(); // int node = QMP_get_node_number();
// GridBase *grid = evec[0]._grid; // GridBase *grid = evec[0]._grid;
@@ -296,14 +303,18 @@ public:
int node = grid->_processor; int node = grid->_processor;
int interval = (NN/total)+1; int interval = (NN/total)+1;
double vl = 0.0, vu = 0.0; double vl = 0.0, vu = 0.0;
int il = interval*node+1 , iu = interval*(node+1); LAPACK_INT il = interval*node+1 , iu = interval*(node+1);
if (iu > NN) iu=NN; if (iu > NN) iu=NN;
double tol = 0.0; double tol = 0.0;
if (1) { if (1) {
memset(evals_tmp,0,sizeof(double)*NN); memset(evals_tmp,0,sizeof(double)*NN);
if ( il <= NN){ if ( il <= NN){
printf("total=%d node=%d il=%d iu=%d\n",total,node,il,iu); printf("total=%d node=%d il=%d iu=%d\n",total,node,il,iu);
#ifdef USE_MKL
dstegr(&jobz, &range, &NN,
#else
LAPACK_dstegr(&jobz, &range, &NN, LAPACK_dstegr(&jobz, &range, &NN,
#endif
(double*)DD, (double*)EE, (double*)DD, (double*)EE,
&vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
&tol, // tolerance &tol, // tolerance
@@ -335,6 +346,7 @@ public:
lmd [NN-1-i]=evals_tmp[i]; lmd [NN-1-i]=evals_tmp[i];
} }
} }
#undef LAPACK_INT
#endif #endif
@@ -365,12 +377,14 @@ public:
// diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid); // diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
#endif #endif
int Niter = 100*N1; int Niter = 10000*N1;
int kmin = 1; int kmin = 1;
int kmax = N2; int kmax = N2;
// (this should be more sophisticated) // (this should be more sophisticated)
for(int iter=0; iter<Niter; ++iter){ for(int iter=0; ; ++iter){
if ( (iter+1)%(100*N1)==0)
std::cout<<GridLogMessage << "[QL method] Not converged - iteration "<<iter+1<<"\n";
// determination of 2x2 leading submatrix // determination of 2x2 leading submatrix
RealD dsub = lmd[kmax-1]-lmd[kmax-2]; RealD dsub = lmd[kmax-1]-lmd[kmax-2];
@@ -399,11 +413,11 @@ public:
_sort.push(lmd3,N2); _sort.push(lmd3,N2);
_sort.push(lmd2,N2); _sort.push(lmd2,N2);
for(int k=0; k<N2; ++k){ for(int k=0; k<N2; ++k){
if (fabs(lmd2[k] - lmd3[k]) >SMALL) std::cout <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl; if (fabs(lmd2[k] - lmd3[k]) >SMALL) std::cout<<GridLogMessage <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl;
// if (fabs(lme2[k] - lme[k]) >SMALL) std::cout <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl; // if (fabs(lme2[k] - lme[k]) >SMALL) std::cout<<GridLogMessage <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl;
} }
for(int k=0; k<N1*N1; ++k){ for(int k=0; k<N1*N1; ++k){
// if (fabs(Qt2[k] - Qt[k]) >SMALL) std::cout <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl; // if (fabs(Qt2[k] - Qt[k]) >SMALL) std::cout<<GridLogMessage <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl;
} }
} }
#endif #endif
@@ -418,7 +432,7 @@ public:
} }
} }
} }
std::cout << "[QL method] Error - Too many iteration: "<<Niter<<"\n"; std::cout<<GridLogMessage << "[QL method] Error - Too many iteration: "<<Niter<<"\n";
abort(); abort();
} }
@@ -435,6 +449,7 @@ public:
DenseVector<Field>& evec, DenseVector<Field>& evec,
int k) int k)
{ {
double t0=-usecond()/1e6;
typedef typename Field::scalar_type MyComplex; typedef typename Field::scalar_type MyComplex;
MyComplex ip; MyComplex ip;
@@ -453,6 +468,8 @@ public:
w = w - ip * evec[j]; w = w - ip * evec[j];
} }
normalise(w); normalise(w);
t0+=usecond()/1e6;
OrthoTime +=t0;
} }
void setUnit_Qt(int Nm, DenseVector<RealD> &Qt) { void setUnit_Qt(int Nm, DenseVector<RealD> &Qt) {
@@ -486,10 +503,10 @@ until convergence
GridBase *grid = evec[0]._grid; GridBase *grid = evec[0]._grid;
assert(grid == src._grid); assert(grid == src._grid);
std::cout << " -- Nk = " << Nk << " Np = "<< Np << std::endl; std::cout<<GridLogMessage << " -- Nk = " << Nk << " Np = "<< Np << std::endl;
std::cout << " -- Nm = " << Nm << std::endl; std::cout<<GridLogMessage << " -- Nm = " << Nm << std::endl;
std::cout << " -- size of eval = " << eval.size() << std::endl; std::cout<<GridLogMessage << " -- size of eval = " << eval.size() << std::endl;
std::cout << " -- size of evec = " << evec.size() << std::endl; std::cout<<GridLogMessage << " -- size of evec = " << evec.size() << std::endl;
assert(Nm == evec.size() && Nm == eval.size()); assert(Nm == evec.size() && Nm == eval.size());
@@ -500,6 +517,7 @@ until convergence
DenseVector<int> Iconv(Nm); DenseVector<int> Iconv(Nm);
DenseVector<Field> B(Nm,grid); // waste of space replicating DenseVector<Field> B(Nm,grid); // waste of space replicating
// DenseVector<Field> Btemp(Nm,grid); // waste of space replicating
Field f(grid); Field f(grid);
Field v(grid); Field v(grid);
@@ -515,35 +533,48 @@ until convergence
// (uniform vector) Why not src?? // (uniform vector) Why not src??
// evec[0] = 1.0; // evec[0] = 1.0;
evec[0] = src; evec[0] = src;
std:: cout <<"norm2(src)= " << norm2(src)<<std::endl; std:: cout<<GridLogMessage <<"norm2(src)= " << norm2(src)<<std::endl;
// << src._grid << std::endl; // << src._grid << std::endl;
normalise(evec[0]); normalise(evec[0]);
std:: cout <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl; std:: cout<<GridLogMessage <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl;
// << evec[0]._grid << std::endl; // << evec[0]._grid << std::endl;
// Initial Nk steps // Initial Nk steps
OrthoTime=0.;
double t0=usecond()/1e6;
for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k); for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
// std:: cout <<"norm2(evec[1])= " << norm2(evec[1]) << std::endl; double t1=usecond()/1e6;
// std:: cout <<"norm2(evec[2])= " << norm2(evec[2]) << std::endl; std::cout<<GridLogMessage <<"IRL::Initial steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
// std:: cout<<GridLogMessage <<"norm2(evec[1])= " << norm2(evec[1]) << std::endl;
// std:: cout<<GridLogMessage <<"norm2(evec[2])= " << norm2(evec[2]) << std::endl;
RitzMatrix(evec,Nk); RitzMatrix(evec,Nk);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::RitzMatrix: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
for(int k=0; k<Nk; ++k){ for(int k=0; k<Nk; ++k){
// std:: cout <<"eval " << k << " " <<eval[k] << std::endl; // std:: cout<<GridLogMessage <<"eval " << k << " " <<eval[k] << std::endl;
// std:: cout <<"lme " << k << " " << lme[k] << std::endl; // std:: cout<<GridLogMessage <<"lme " << k << " " << lme[k] << std::endl;
} }
// Restarting loop begins // Restarting loop begins
for(int iter = 0; iter<Niter; ++iter){ for(int iter = 0; iter<Niter; ++iter){
std::cout<<"\n Restart iteration = "<< iter << std::endl; std::cout<<GridLogMessage<<"\n Restart iteration = "<< iter << std::endl;
// //
// Rudy does a sort first which looks very different. Getting fed up with sorting out the algo defs. // Rudy does a sort first which looks very different. Getting fed up with sorting out the algo defs.
// We loop over // We loop over
// //
OrthoTime=0.;
for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k); for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: "<<Np <<" steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
f *= lme[Nm-1]; f *= lme[Nm-1];
RitzMatrix(evec,k2); RitzMatrix(evec,k2);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: RitzMatrix: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
// getting eigenvalues // getting eigenvalues
for(int k=0; k<Nm; ++k){ for(int k=0; k<Nm; ++k){
@@ -552,18 +583,27 @@ until convergence
} }
setUnit_Qt(Nm,Qt); setUnit_Qt(Nm,Qt);
diagonalize(eval2,lme2,Nm,Nm,Qt,grid); diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
// sorting // sorting
_sort.push(eval2,Nm); _sort.push(eval2,Nm);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: eval sorting: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
// Implicitly shifted QR transformations // Implicitly shifted QR transformations
setUnit_Qt(Nm,Qt); setUnit_Qt(Nm,Qt);
for(int ip=0; ip<k2; ++ip){
std::cout<<GridLogMessage << "eval "<< ip << " "<< eval2[ip] << std::endl;
}
for(int ip=k2; ip<Nm; ++ip){ for(int ip=k2; ip<Nm; ++ip){
std::cout << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl; std::cout<<GridLogMessage << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl;
qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm); qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
} }
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::qr_decomp: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
if (0) {
for(int i=0; i<(Nk+1); ++i) B[i] = 0.0; for(int i=0; i<(Nk+1); ++i) B[i] = 0.0;
for(int j=k1-1; j<k2+1; ++j){ for(int j=k1-1; j<k2+1; ++j){
@@ -571,6 +611,30 @@ until convergence
B[j].checkerboard = evec[k].checkerboard; B[j].checkerboard = evec[k].checkerboard;
B[j] += Qt[k+Nm*j] * evec[k]; B[j] += Qt[k+Nm*j] * evec[k];
} }
}
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::QR Rotate: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
}
if (1) {
for(int i=0; i<(Nk+1); ++i) {
B[i] = 0.0;
B[i].checkerboard = evec[0].checkerboard;
}
int j_block = 24; int k_block=24;
PARALLEL_FOR_LOOP
for(int ss=0;ss < grid->oSites();ss++){
for(int jj=k1-1; jj<k2+1; jj += j_block)
for(int kk=0; kk<Nm; kk += k_block)
for(int j=jj; (j<(k2+1)) && j<(jj+j_block); ++j){
for(int k=kk; (k<Nm) && k<(kk+k_block) ; ++k){
B[j]._odata[ss] +=Qt[k+Nm*j] * evec[k]._odata[ss];
}
}
}
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::QR rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
} }
for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j]; for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j];
@@ -579,7 +643,7 @@ until convergence
f += lme[k2-1] * evec[k2]; f += lme[k2-1] * evec[k2];
beta_k = norm2(f); beta_k = norm2(f);
beta_k = sqrt(beta_k); beta_k = sqrt(beta_k);
std::cout<<" beta(k) = "<<beta_k<<std::endl; std::cout<<GridLogMessage<<" beta(k) = "<<beta_k<<std::endl;
RealD betar = 1.0/beta_k; RealD betar = 1.0/beta_k;
evec[k2] = betar * f; evec[k2] = betar * f;
@@ -592,7 +656,10 @@ until convergence
} }
setUnit_Qt(Nm,Qt); setUnit_Qt(Nm,Qt);
diagonalize(eval2,lme2,Nk,Nm,Qt,grid); diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
if (0) {
for(int k = 0; k<Nk; ++k) B[k]=0.0; for(int k = 0; k<Nk; ++k) B[k]=0.0;
for(int j = 0; j<Nk; ++j){ for(int j = 0; j<Nk; ++j){
@@ -600,12 +667,34 @@ until convergence
B[j].checkerboard = evec[k].checkerboard; B[j].checkerboard = evec[k].checkerboard;
B[j] += Qt[k+j*Nm] * evec[k]; B[j] += Qt[k+j*Nm] * evec[k];
} }
// std::cout << "norm(B["<<j<<"])="<<norm2(B[j])<<std::endl; std::cout<<GridLogMessage << "norm(B["<<j<<"])="<<norm2(B[j])<<std::endl;
}
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::Convergence rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
}
if (1) {
for(int i=0; i<(Nk+1); ++i) {
B[i] = 0.0;
B[i].checkerboard = evec[0].checkerboard;
}
int j_block = 24; int k_block=24;
PARALLEL_FOR_LOOP
for(int ss=0;ss < grid->oSites();ss++){
for(int jj=0; jj<Nk; jj += j_block)
for(int kk=0; kk<Nk; kk += k_block)
for(int j=jj; (j<Nk) && j<(jj+j_block); ++j){
for(int k=kk; (k<Nk) && k<(kk+k_block) ; ++k){
B[j]._odata[ss] +=Qt[k+Nm*j] * evec[k]._odata[ss];
}
}
}
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::convergence rotation : "<<t1-t0<< "seconds"<<std::endl; t0=t1;
} }
// _sort.push(eval2,B,Nk);
Nconv = 0; Nconv = 0;
// std::cout << std::setiosflags(std::ios_base::scientific); // std::cout<<GridLogMessage << std::setiosflags(std::ios_base::scientific);
for(int i=0; i<Nk; ++i){ for(int i=0; i<Nk; ++i){
// _poly(_Linop,B[i],v); // _poly(_Linop,B[i],v);
@@ -613,14 +702,16 @@ until convergence
RealD vnum = real(innerProduct(B[i],v)); // HermOp. RealD vnum = real(innerProduct(B[i],v)); // HermOp.
RealD vden = norm2(B[i]); RealD vden = norm2(B[i]);
RealD vv0 = norm2(v);
eval2[i] = vnum/vden; eval2[i] = vnum/vden;
v -= eval2[i]*B[i]; v -= eval2[i]*B[i];
RealD vv = norm2(v); RealD vv = norm2(v);
std::cout.precision(13); std::cout.precision(13);
std::cout << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] "; std::cout<<GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
std::cout<<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i]; std::cout<<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i];
std::cout <<" |H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv<< std::endl; std::cout<<"|H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv;
std::cout<<" "<< vnum/(sqrt(vden)*sqrt(vv0)) << std::endl;
// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged // change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
if((vv<eresid*eresid) && (i == Nconv) ){ if((vv<eresid*eresid) && (i == Nconv) ){
@@ -629,17 +720,19 @@ until convergence
} }
} // i-loop end } // i-loop end
// std::cout << std::resetiosflags(std::ios_base::scientific); // std::cout<<GridLogMessage << std::resetiosflags(std::ios_base::scientific);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::convergence testing: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
std::cout<<" #modes converged: "<<Nconv<<std::endl; std::cout<<GridLogMessage<<" #modes converged: "<<Nconv<<std::endl;
if( Nconv>=Nstop ){ if( Nconv>=Nstop ){
goto converged; goto converged;
} }
} // end of iter loop } // end of iter loop
std::cout<<"\n NOT converged.\n"; std::cout<<GridLogMessage<<"\n NOT converged.\n";
abort(); abort();
converged: converged:
@@ -652,10 +745,10 @@ until convergence
} }
_sort.push(eval,evec,Nconv); _sort.push(eval,evec,Nconv);
std::cout << "\n Converged\n Summary :\n"; std::cout<<GridLogMessage << "\n Converged\n Summary :\n";
std::cout << " -- Iterations = "<< Nconv << "\n"; std::cout<<GridLogMessage << " -- Iterations = "<< Nconv << "\n";
std::cout << " -- beta(k) = "<< beta_k << "\n"; std::cout<<GridLogMessage << " -- beta(k) = "<< beta_k << "\n";
std::cout << " -- Nconv = "<< Nconv << "\n"; std::cout<<GridLogMessage << " -- Nconv = "<< Nconv << "\n";
} }
///////////////////////////////////////////////// /////////////////////////////////////////////////
@@ -678,25 +771,25 @@ until convergence
} }
} }
std::cout<<"Lanczos_Factor start/end " <<start <<"/"<<end<<std::endl; std::cout<<GridLogMessage<<"Lanczos_Factor start/end " <<start <<"/"<<end<<std::endl;
// Starting from scratch, bq[0] contains a random vector and |bq[0]| = 1 // Starting from scratch, bq[0] contains a random vector and |bq[0]| = 1
int first; int first;
if(start == 0){ if(start == 0){
std::cout << "start == 0\n"; //TESTING std::cout<<GridLogMessage << "start == 0\n"; //TESTING
_poly(_Linop,bq[0],bf); _poly(_Linop,bq[0],bf);
alpha = real(innerProduct(bq[0],bf));//alpha = bq[0]^dag A bq[0] alpha = real(innerProduct(bq[0],bf));//alpha = bq[0]^dag A bq[0]
std::cout << "alpha = " << alpha << std::endl; std::cout<<GridLogMessage << "alpha = " << alpha << std::endl;
bf = bf - alpha * bq[0]; //bf = A bq[0] - alpha bq[0] bf = bf - alpha * bq[0]; //bf = A bq[0] - alpha bq[0]
H[0][0]=alpha; H[0][0]=alpha;
std::cout << "Set H(0,0) to " << H[0][0] << std::endl; std::cout<<GridLogMessage << "Set H(0,0) to " << H[0][0] << std::endl;
first = 1; first = 1;
@@ -716,19 +809,19 @@ until convergence
beta = 0;sqbt = 0; beta = 0;sqbt = 0;
std::cout << "cont is true so setting beta to zero\n"; std::cout<<GridLogMessage << "cont is true so setting beta to zero\n";
} else { } else {
beta = norm2(bf); beta = norm2(bf);
sqbt = sqrt(beta); sqbt = sqrt(beta);
std::cout << "beta = " << beta << std::endl; std::cout<<GridLogMessage << "beta = " << beta << std::endl;
} }
for(int j=first;j<end;j++){ for(int j=first;j<end;j++){
std::cout << "Factor j " << j <<std::endl; std::cout<<GridLogMessage << "Factor j " << j <<std::endl;
if(cont){ // switches to factoring; understand start!=0 and initial bf value is right. if(cont){ // switches to factoring; understand start!=0 and initial bf value is right.
bq[j] = bf; cont = false; bq[j] = bf; cont = false;
@@ -751,7 +844,7 @@ until convergence
beta = fnorm; beta = fnorm;
sqbt = sqrt(beta); sqbt = sqrt(beta);
std::cout << "alpha = " << alpha << " fnorm = " << fnorm << '\n'; std::cout<<GridLogMessage << "alpha = " << alpha << " fnorm = " << fnorm << '\n';
///Iterative refinement of orthogonality V = [ bq[0] bq[1] ... bq[M] ] ///Iterative refinement of orthogonality V = [ bq[0] bq[1] ... bq[M] ]
int re = 0; int re = 0;
@@ -786,8 +879,8 @@ until convergence
bck = sqrt( nmbex ); bck = sqrt( nmbex );
re++; re++;
} }
std::cout << "Iteratively refined orthogonality, changes alpha\n"; std::cout<<GridLogMessage << "Iteratively refined orthogonality, changes alpha\n";
if(re > 1) std::cout << "orthagonality refined " << re << " times" <<std::endl; if(re > 1) std::cout<<GridLogMessage << "orthagonality refined " << re << " times" <<std::endl;
H[j][j]=alpha; H[j][j]=alpha;
} }
@@ -802,11 +895,13 @@ until convergence
void ImplicitRestart(int TM, DenseVector<RealD> &evals, DenseVector<DenseVector<RealD> > &evecs, DenseVector<Field> &bq, Field &bf, int cont) void ImplicitRestart(int TM, DenseVector<RealD> &evals, DenseVector<DenseVector<RealD> > &evecs, DenseVector<Field> &bq, Field &bf, int cont)
{ {
std::cout << "ImplicitRestart begin. Eigensort starting\n"; std::cout<<GridLogMessage << "ImplicitRestart begin. Eigensort starting\n";
DenseMatrix<RealD> H; Resize(H,Nm,Nm); DenseMatrix<RealD> H; Resize(H,Nm,Nm);
#ifndef USE_LAPACK
EigenSort(evals, evecs); EigenSort(evals, evecs);
#endif
///Assign shifts ///Assign shifts
int K=Nk; int K=Nk;
@@ -829,15 +924,15 @@ until convergence
/// Shifted H defines a new K step Arnoldi factorization /// Shifted H defines a new K step Arnoldi factorization
RealD beta = H[ff][ff-1]; RealD beta = H[ff][ff-1];
RealD sig = Q[TM - 1][ff - 1]; RealD sig = Q[TM - 1][ff - 1];
std::cout << "beta = " << beta << " sig = " << real(sig) <<std::endl; std::cout<<GridLogMessage << "beta = " << beta << " sig = " << real(sig) <<std::endl;
std::cout << "TM = " << TM << " "; std::cout<<GridLogMessage << "TM = " << TM << " ";
std::cout << norm2(bq[0]) << " -- before" <<std::endl; std::cout<<GridLogMessage << norm2(bq[0]) << " -- before" <<std::endl;
/// q -> q Q /// q -> q Q
times_real(bq, Q, TM); times_real(bq, Q, TM);
std::cout << norm2(bq[0]) << " -- after " << ff <<std::endl; std::cout<<GridLogMessage << norm2(bq[0]) << " -- after " << ff <<std::endl;
bf = beta* bq[ff] + sig* bf; bf = beta* bq[ff] + sig* bf;
/// Do the rest of the factorization /// Do the rest of the factorization
@@ -861,7 +956,7 @@ until convergence
int ff = Lanczos_Factor(0, M, cont, bq,bf,H); // 0--M to begin with int ff = Lanczos_Factor(0, M, cont, bq,bf,H); // 0--M to begin with
if(ff < M) { if(ff < M) {
std::cout << "Krylov: aborting ff "<<ff <<" "<<M<<std::endl; std::cout<<GridLogMessage << "Krylov: aborting ff "<<ff <<" "<<M<<std::endl;
abort(); // Why would this happen? abort(); // Why would this happen?
} }
@@ -870,7 +965,7 @@ until convergence
for(int it = 0; it < Niter && (converged < Nk); ++it) { for(int it = 0; it < Niter && (converged < Nk); ++it) {
std::cout << "Krylov: Iteration --> " << it << std::endl; std::cout<<GridLogMessage << "Krylov: Iteration --> " << it << std::endl;
int lock_num = lock ? converged : 0; int lock_num = lock ? converged : 0;
DenseVector<RealD> tevals(M - lock_num ); DenseVector<RealD> tevals(M - lock_num );
DenseMatrix<RealD> tevecs; Resize(tevecs,M - lock_num,M - lock_num); DenseMatrix<RealD> tevecs; Resize(tevecs,M - lock_num,M - lock_num);
@@ -886,7 +981,7 @@ until convergence
Wilkinson<RealD>(H, evals, evecs, small); Wilkinson<RealD>(H, evals, evecs, small);
// Check(); // Check();
std::cout << "Done "<<std::endl; std::cout<<GridLogMessage << "Done "<<std::endl;
} }
@@ -951,7 +1046,7 @@ until convergence
DenseVector<RealD> &tevals, DenseVector<DenseVector<RealD> > &tevecs, DenseVector<RealD> &tevals, DenseVector<DenseVector<RealD> > &tevecs,
int lock, int converged) int lock, int converged)
{ {
std::cout << "Converged " << converged << " so far." << std::endl; std::cout<<GridLogMessage << "Converged " << converged << " so far." << std::endl;
int lock_num = lock ? converged : 0; int lock_num = lock ? converged : 0;
int M = Nm; int M = Nm;
@@ -966,7 +1061,9 @@ until convergence
RealD small=1.0e-16; RealD small=1.0e-16;
Wilkinson<RealD>(AH, tevals, tevecs, small); Wilkinson<RealD>(AH, tevals, tevecs, small);
#ifndef USE_LAPACK
EigenSort(tevals, tevecs); EigenSort(tevals, tevecs);
#endif
RealD resid_nrm= norm2(bf); RealD resid_nrm= norm2(bf);
@@ -977,7 +1074,7 @@ until convergence
RealD diff = 0; RealD diff = 0;
diff = abs( tevecs[i][Nm - 1 - lock_num] ) * resid_nrm; diff = abs( tevecs[i][Nm - 1 - lock_num] ) * resid_nrm;
std::cout << "residual estimate " << SS-1-i << " " << diff << " of (" << tevals[i] << ")" << std::endl; std::cout<<GridLogMessage << "residual estimate " << SS-1-i << " " << diff << " of (" << tevals[i] << ")" << std::endl;
if(diff < converged) { if(diff < converged) {
@@ -993,13 +1090,13 @@ until convergence
lock_num++; lock_num++;
} }
converged++; converged++;
std::cout << " converged on eval " << converged << " of " << Nk << std::endl; std::cout<<GridLogMessage << " converged on eval " << converged << " of " << Nk << std::endl;
} else { } else {
break; break;
} }
} }
#endif #endif
std::cout << "Got " << converged << " so far " <<std::endl; std::cout<<GridLogMessage << "Got " << converged << " so far " <<std::endl;
} }
///Check ///Check
@@ -1008,7 +1105,9 @@ until convergence
DenseVector<RealD> goodval(this->get); DenseVector<RealD> goodval(this->get);
#ifndef USE_LAPACK
EigenSort(evals,evecs); EigenSort(evals,evecs);
#endif
int NM = Nm; int NM = Nm;
@@ -1080,10 +1179,10 @@ say con = 2
**/ **/
template<class T> template<class T>
static void Lock(DenseMatrix<T> &H, // Hess mtx static void Lock(DenseMatrix<T> &H, ///Hess mtx
DenseMatrix<T> &Q, // Lock Transform DenseMatrix<T> &Q, ///Lock Transform
T val, // value to be locked T val, ///value to be locked
int con, // number already locked int con, ///number already locked
RealD small, RealD small,
int dfg, int dfg,
bool herm) bool herm)

View File

@@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <iomanip> #include <iomanip>
#include <complex> #include <complex>
#include <typeinfo> #include <typeinfo>
#include <Grid/Grid.h> #include <Grid.h>
/** Sign function **/ /** Sign function **/

View File

@@ -81,10 +81,12 @@ int main(int argc, char** argv) {
RealD M5 = 1.8; RealD M5 = 1.8;
std::vector < std::complex<double> > omegas; std::vector < std::complex<double> > omegas;
for(int i=0;i<Ls;i++){ for(int i=0;i<Ls;i++){
std::complex<double> temp (0.25+0.00*i, 0.0+0.00*i); double imag = 0.;
if (i==0) imag=1.;
if (i==Ls-1) imag=-1.;
std::complex<double> temp (0.25+0.01*i, imag*0.01);
omegas.push_back(temp); omegas.push_back(temp);
} }
// DomainWallFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omegas,1.,0.); ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omegas,1.,0.);
LatticeFermion src_o(FrbGrid); LatticeFermion src_o(FrbGrid);