mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-14 09:45:36 +00:00
Works with CPS evolution
This commit is contained in:
parent
e9712bc7fb
commit
0d5af667d8
@ -39,6 +39,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/algorithms/approx/MultiShiftFunction.h>
|
||||
|
||||
#include <Grid/algorithms/iterative/ConjugateGradient.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientShifted.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateResidual.h>
|
||||
#include <Grid/algorithms/iterative/NormalEquations.h>
|
||||
#include <Grid/algorithms/iterative/SchurRedBlack.h>
|
||||
|
@ -45,8 +45,6 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
|
||||
ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||
: Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
@ -157,14 +155,13 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
std::cout << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||
IterationsToComplete = k;
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
|
||||
<< std::endl;
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -35,7 +35,6 @@ namespace Grid {
|
||||
class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
|
||||
public:
|
||||
RealD Tolerance;
|
||||
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
|
||||
Integer MaxInnerIterations;
|
||||
Integer MaxOuterIterations;
|
||||
GridBase* SinglePrecGrid; //Grid for single-precision fields
|
||||
@ -43,16 +42,12 @@ namespace Grid {
|
||||
LinearOperatorBase<FieldF> &Linop_f;
|
||||
LinearOperatorBase<FieldD> &Linop_d;
|
||||
|
||||
Integer TotalInnerIterations; //Number of inner CG iterations
|
||||
Integer TotalOuterIterations; //Number of restarts
|
||||
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
|
||||
|
||||
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
|
||||
LinearFunction<FieldF> *guesser;
|
||||
|
||||
MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
|
||||
Linop_f(_Linop_f), Linop_d(_Linop_d),
|
||||
Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
|
||||
Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
|
||||
OuterLoopNormMult(100.), guesser(NULL){ };
|
||||
|
||||
void useGuesser(LinearFunction<FieldF> &g){
|
||||
@ -60,8 +55,9 @@ namespace Grid {
|
||||
}
|
||||
|
||||
void operator() (const FieldD &src_d_in, FieldD &sol_d){
|
||||
TotalInnerIterations = 0;
|
||||
|
||||
(*this)(src_d_in,sol_d,NULL);
|
||||
}
|
||||
void operator() (const FieldD &src_d_in, FieldD &sol_d, RealD *shift){
|
||||
GridStopWatch TotalTimer;
|
||||
TotalTimer.Start();
|
||||
|
||||
@ -81,7 +77,7 @@ namespace Grid {
|
||||
FieldD src_d(DoublePrecGrid);
|
||||
src_d = src_d_in; //source for next inner iteration, computed from residual during operation
|
||||
|
||||
RealD inner_tol = InnerTolerance;
|
||||
RealD inner_tol = Tolerance;
|
||||
|
||||
FieldF src_f(SinglePrecGrid);
|
||||
src_f.checkerboard = cb;
|
||||
@ -89,18 +85,17 @@ namespace Grid {
|
||||
FieldF sol_f(SinglePrecGrid);
|
||||
sol_f.checkerboard = cb;
|
||||
|
||||
ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations);
|
||||
ConjugateGradientShifted<FieldF> CG_f(inner_tol, MaxInnerIterations);
|
||||
CG_f.ErrorOnNoConverge = false;
|
||||
|
||||
GridStopWatch InnerCGtimer;
|
||||
|
||||
GridStopWatch PrecChangeTimer;
|
||||
|
||||
Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count
|
||||
|
||||
for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
|
||||
for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
|
||||
//Compute double precision rsd and also new RHS vector.
|
||||
Linop_d.HermOp(sol_d, tmp_d);
|
||||
if(shift) axpy(tmp_d,*shift,sol_d,tmp_d);
|
||||
RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
|
||||
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl;
|
||||
@ -124,9 +119,8 @@ namespace Grid {
|
||||
//Inner CG
|
||||
CG_f.Tolerance = inner_tol;
|
||||
InnerCGtimer.Start();
|
||||
CG_f(Linop_f, src_f, sol_f);
|
||||
CG_f(Linop_f, src_f, sol_f,shift);
|
||||
InnerCGtimer.Stop();
|
||||
TotalInnerIterations += CG_f.IterationsToComplete;
|
||||
|
||||
//Convert sol back to double and add to double prec solution
|
||||
PrecChangeTimer.Start();
|
||||
@ -139,13 +133,11 @@ namespace Grid {
|
||||
//Final trial CG
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl;
|
||||
|
||||
ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
|
||||
CG_d(Linop_d, src_d_in, sol_d);
|
||||
TotalFinalStepIterations = CG_d.IterationsToComplete;
|
||||
ConjugateGradientShifted<FieldD> CG_d(Tolerance, MaxInnerIterations);
|
||||
CG_d(Linop_d, src_d_in, sol_d,shift);
|
||||
|
||||
TotalTimer.Stop();
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl;
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total time " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -45,6 +45,7 @@ public:
|
||||
Integer MaxIterations;
|
||||
int verbose;
|
||||
MultiShiftFunction shifts;
|
||||
int iter;
|
||||
|
||||
ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) :
|
||||
MaxIterations(maxit),
|
||||
@ -60,6 +61,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, Field &psi)
|
||||
std::vector<Field> results(nshift,grid);
|
||||
(*this)(Linop,src,results,psi);
|
||||
}
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &results, Field &psi)
|
||||
{
|
||||
int nshift = shifts.order;
|
||||
@ -105,11 +107,12 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
RealD a,b,c,d;
|
||||
RealD cp,bp,qq; //prev
|
||||
|
||||
int cb=src.checkerboard;
|
||||
// Matrix mult fields
|
||||
Field r(grid);
|
||||
Field p(grid);
|
||||
Field p(grid); p.checkerboard = src.checkerboard;
|
||||
Field tmp(grid);
|
||||
Field mmp(grid);
|
||||
Field mmp(grid);mmp.checkerboard = src.checkerboard;
|
||||
|
||||
// Check lightest mass
|
||||
for(int s=0;s<nshift;s++){
|
||||
@ -132,6 +135,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
p=src;
|
||||
|
||||
//MdagM+m[0]
|
||||
std::cout << "p.checkerboard " << p.checkerboard
|
||||
<< "mmp.checkerboard " << mmp.checkerboard << std::endl;
|
||||
|
||||
Linop.HermOpAndNorm(p,mmp,d,qq);
|
||||
axpy(mmp,mass[0],p,mmp);
|
||||
RealD rn = norm2(p);
|
||||
@ -269,6 +275,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
|
||||
RealD cn = norm2(src);
|
||||
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
|
||||
}
|
||||
iter = k;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
404
lib/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h
Normal file
404
lib/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h
Normal file
@ -0,0 +1,404 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Chulwoo Jung <chulwoo@quark.phy.bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END/ LEGAL */
|
||||
#ifndef GRID_CONJUGATE_GRADIENT_MULTI_MIXED_PREC_H
|
||||
#define GRID_CONJUGATE_GRADIENT_MULTI_MIXED_PREC_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//Mixed precision restarted defect correction CG
|
||||
template<class FieldD,class FieldF
|
||||
//, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0
|
||||
//, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0
|
||||
>
|
||||
class MixedPrecisionConjugateGradientMultiShift : public LinearFunction<FieldD> {
|
||||
public:
|
||||
// RealD Tolerance;
|
||||
Integer MaxInnerIterations;
|
||||
Integer MaxOuterIterations;
|
||||
GridBase* SinglePrecGrid; //Grid for single-precision fields
|
||||
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
|
||||
LinearOperatorBase<FieldF> &Linop_f;
|
||||
LinearOperatorBase<FieldD> &Linop_d;
|
||||
MultiShiftFunction shifts;
|
||||
Integer iter;
|
||||
|
||||
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
|
||||
// LinearFunction<FieldF> *guesser;
|
||||
|
||||
MixedPrecisionConjugateGradientMultiShift(GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d,
|
||||
Integer maxinnerit, MultiShiftFunction &_shifts ) :
|
||||
Linop_f(_Linop_f), Linop_d(_Linop_d),
|
||||
MaxInnerIterations(maxinnerit), SinglePrecGrid(_sp_grid),
|
||||
OuterLoopNormMult(100.), shifts(_shifts) {};
|
||||
|
||||
|
||||
void operator() (const FieldD &src_d_in, FieldD &sol_d){
|
||||
assert(0); // not yet implemented
|
||||
}
|
||||
void operator() (const FieldD &src_d_in, std::vector<FieldD> &sol_d){
|
||||
GridStopWatch TotalTimer;
|
||||
TotalTimer.Start();
|
||||
|
||||
int cb = src_d_in.checkerboard;
|
||||
|
||||
int nshift = shifts.order;
|
||||
assert(nshift == sol_d.size());
|
||||
for(int i=0;i<nshift;i++) sol_d[i].checkerboard = cb;
|
||||
|
||||
RealD src_norm = norm2(src_d_in);
|
||||
// RealD stop = src_norm * Tolerance*Tolerance;
|
||||
|
||||
GridBase* DoublePrecGrid = src_d_in._grid;
|
||||
FieldD tmp_d(DoublePrecGrid); tmp_d.checkerboard = cb;
|
||||
|
||||
FieldD tmp2_d(DoublePrecGrid); tmp2_d.checkerboard = cb;
|
||||
|
||||
FieldD src_d(DoublePrecGrid);
|
||||
src_d = src_d_in; //source for next inner iteration, computed from residual during operation
|
||||
|
||||
// RealD inner_tol = Tolerance;
|
||||
FieldD psi_d(DoublePrecGrid);psi_d.checkerboard = cb;
|
||||
|
||||
FieldF src_f(SinglePrecGrid);
|
||||
src_f.checkerboard = cb;
|
||||
|
||||
std::vector<FieldF> sol_f(nshift,SinglePrecGrid);
|
||||
for(int i=0;i<nshift;i++) sol_f[i].checkerboard = cb;
|
||||
|
||||
// ConjugateGradientShifted<FieldF> CG_f(inner_tol, MaxInnerIterations);
|
||||
ConjugateGradientMultiShift<FieldF> MSCG(MaxInnerIterations,shifts);
|
||||
// CG_f.ErrorOnNoConverge = false;
|
||||
|
||||
GridStopWatch InnerCGtimer;
|
||||
|
||||
GridStopWatch PrecChangeTimer;
|
||||
|
||||
{
|
||||
// std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl;
|
||||
|
||||
// if(norm < OuterLoopNormMult * stop){
|
||||
// std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration converged on iteration " <<outer_iter <<std::endl;
|
||||
// break;
|
||||
// }
|
||||
// while(norm * inner_tol * inner_tol < stop) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ??
|
||||
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(src_f, src_d);
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
// zeroit(sol_f);
|
||||
|
||||
|
||||
//Inner CG
|
||||
InnerCGtimer.Start();
|
||||
int if_relup = 0;
|
||||
#if 0
|
||||
MSCG(Linop_f,src_f,sol_f);
|
||||
#else
|
||||
{
|
||||
|
||||
GridBase *grid = SinglePrecGrid;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Convenience references to the info stored in "MultiShiftFunction"
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
int nshift = shifts.order;
|
||||
|
||||
|
||||
std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts"
|
||||
std::vector<RealD> &mresidual(shifts.tolerances);
|
||||
std::vector<RealD> alpha(nshift,1.);
|
||||
std::vector<FieldF> ps(nshift,grid);// Search directions
|
||||
|
||||
assert(sol_f.size()==nshift);
|
||||
assert(mass.size()==nshift);
|
||||
assert(mresidual.size()==nshift);
|
||||
|
||||
// dynamic sized arrays on stack; 2d is a pain with vector
|
||||
RealD bs[nshift];
|
||||
RealD rsq[nshift];
|
||||
RealD z[nshift][2];
|
||||
int converged[nshift];
|
||||
|
||||
const int primary =0;
|
||||
|
||||
//Primary shift fields CG iteration
|
||||
RealD a,b,c,d;
|
||||
RealD cp,bp,qq; //prev
|
||||
|
||||
int cb=src_f.checkerboard;
|
||||
// Matrix mult fields
|
||||
FieldF r(grid); r.checkerboard = src_f.checkerboard;
|
||||
FieldF p(grid); p.checkerboard = src_f.checkerboard;
|
||||
FieldF tmp(grid); tmp.checkerboard = src_f.checkerboard;
|
||||
FieldF mmp(grid);mmp.checkerboard = src_f.checkerboard;
|
||||
FieldF psi(grid);psi.checkerboard = src_f.checkerboard;
|
||||
std::cout.precision(12);
|
||||
std::cout<<GridLogMessage<<"norm2(psi_d)= "<<norm2(psi_d)<<std::endl;
|
||||
std::cout<<GridLogMessage<<"norm2(psi)= "<<norm2(psi)<<std::endl;
|
||||
|
||||
|
||||
// Check lightest mass
|
||||
for(int s=0;s<nshift;s++){
|
||||
assert( mass[s]>= mass[primary] );
|
||||
converged[s]=0;
|
||||
}
|
||||
|
||||
// Wire guess to zero
|
||||
// Residuals "r" are src
|
||||
// First search direction "p" is also src
|
||||
cp = norm2(src_f);
|
||||
Real c_relup = cp;
|
||||
for(int s=0;s<nshift;s++){
|
||||
rsq[s] = cp * mresidual[s] * mresidual[s];
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradientMultiShift: shift "<<s
|
||||
<<" target resid "<<rsq[s]<<std::endl;
|
||||
ps[s] = src_f;
|
||||
}
|
||||
// r and p for primary
|
||||
r=src_f;
|
||||
p=src_f;
|
||||
|
||||
//MdagM+m[0]
|
||||
std::cout << "p.checkerboard " << p.checkerboard
|
||||
<< "mmp.checkerboard " << mmp.checkerboard << std::endl;
|
||||
|
||||
Linop_f.HermOpAndNorm(p,mmp,d,qq);
|
||||
axpy(mmp,mass[0],p,mmp);
|
||||
RealD rn = norm2(p);
|
||||
d += rn*mass[0];
|
||||
|
||||
// have verified that inner product of
|
||||
// p and mmp is equal to d after this since
|
||||
// the d computation is tricky
|
||||
// qq = real(innerProduct(p,mmp));
|
||||
// std::cout<<GridLogMessage << "debug equal ? qq "<<qq<<" d "<< d<<std::endl;
|
||||
|
||||
b = -cp /d;
|
||||
|
||||
// Set up the various shift variables
|
||||
int iz=0;
|
||||
z[0][1-iz] = 1.0;
|
||||
z[0][iz] = 1.0;
|
||||
bs[0] = b;
|
||||
for(int s=1;s<nshift;s++){
|
||||
z[s][1-iz] = 1.0;
|
||||
z[s][iz] = 1.0/( 1.0 - b*(mass[s]-mass[0]));
|
||||
bs[s] = b*z[s][iz];
|
||||
}
|
||||
|
||||
// r += b[0] A.p[0]
|
||||
// c= norm(r)
|
||||
c=axpy_norm(r,b,mmp,r);
|
||||
|
||||
axpby(psi,0.,-bs[0],src_f,src_f);
|
||||
for(int s=0;s<nshift;s++) {
|
||||
axpby(sol_f[s],0.,-bs[s]*alpha[s],src_f,src_f);
|
||||
}
|
||||
|
||||
|
||||
// Iteration loop
|
||||
int k;
|
||||
// inefficient zeroing, please replace!
|
||||
// RealD sol_norm = axpy_norm(sol_d[0],-1.,sol_d[0],sol_d[0]);
|
||||
zeroit(sol_d[0]);
|
||||
std::cout<<GridLogMessage<<"norm(sol_d[0])= "<<norm2(sol_d[0])<<std::endl;
|
||||
|
||||
|
||||
int all_converged = 1;
|
||||
RealD tmp1,tmp2;
|
||||
for (k=1;k<=MaxOuterIterations;k++){
|
||||
|
||||
a = c /cp;
|
||||
axpy(p,a,p,r);
|
||||
|
||||
// Note to self - direction ps is iterated seperately
|
||||
// for each shift. Does not appear to have any scope
|
||||
// for avoiding linear algebra in "single" case.
|
||||
//
|
||||
// However SAME r is used. Could load "r" and update
|
||||
// ALL ps[s]. 2/3 Bandwidth saving
|
||||
// New Kernel: Load r, vector of coeffs, vector of pointers ps
|
||||
for(int s=0;s<nshift;s++){
|
||||
if ( ! converged[s] ) {
|
||||
if (s==0){
|
||||
axpy(ps[s],a,ps[s],r);
|
||||
} else{
|
||||
RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b);
|
||||
axpby(ps[s],z[s][iz],as,r,ps[s]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cp=c;
|
||||
|
||||
Linop_f.HermOpAndNorm(p,mmp,d,qq);
|
||||
axpy(mmp,mass[0],p,mmp);
|
||||
RealD rn = norm2(p);
|
||||
d += rn*mass[0];
|
||||
|
||||
bp=b;
|
||||
b=-cp/d;
|
||||
|
||||
c=axpy_norm(r,b,mmp,r);
|
||||
|
||||
|
||||
// Toggle the recurrence history
|
||||
bs[0] = b;
|
||||
iz = 1-iz;
|
||||
for(int s=1;s<nshift;s++){
|
||||
if((!converged[s])){
|
||||
RealD z0 = z[s][1-iz];
|
||||
RealD z1 = z[s][iz];
|
||||
z[s][iz] = z0*z1*bp
|
||||
/ (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));
|
||||
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
|
||||
}
|
||||
}
|
||||
|
||||
axpy(psi,-bs[0],ps[0],psi);
|
||||
for(int s=0;s<nshift;s++){
|
||||
int ss = s;
|
||||
// Scope for optimisation here in case of "single".
|
||||
// Could load sol_f[0] and pull all ps[s] in.
|
||||
// if ( single ) ss=primary;
|
||||
// Bandwith saving in single case is Ls * 3 -> 2+Ls, so ~ 3x saving
|
||||
// Pipelined CG gain:
|
||||
//
|
||||
// New Kernel: Load r, vector of coeffs, vector of pointers ps
|
||||
// New Kernel: Load sol_f[0], vector of coeffs, vector of pointers ps
|
||||
// If can predict the coefficient bs then we can fuse these and avoid write reread cyce
|
||||
// on ps[s].
|
||||
// Before: 3 x npole + 3 x npole
|
||||
// After : 2 x npole (ps[s]) => 3x speed up of multishift CG.
|
||||
|
||||
if( (!converged[s]) ) {
|
||||
axpy(sol_f[ss],-bs[s]*alpha[s],ps[s],sol_f[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (k%MaxInnerIterations==0){
|
||||
// if (c < 1e-4*c_relup){
|
||||
RealD c_f=c;
|
||||
precisionChange(tmp_d,psi);
|
||||
RealD sol_norm =axpy_norm (psi_d,1.,tmp_d,psi_d);
|
||||
tmp1 = norm2(psi);
|
||||
zeroit(psi);
|
||||
tmp2 = norm2(psi);
|
||||
std::cout<<GridLogMessage<<"k= "<<k<<" norm2(sol)= "<<sol_norm<<" "<<tmp1<<" "<<tmp2<<std::endl;
|
||||
// precisionChange(sol_d[0],sol_f[0]);
|
||||
Linop_d.HermOpAndNorm(psi_d,tmp_d,tmp1,tmp2);
|
||||
axpy(tmp2_d,mass[0],psi_d,tmp_d);
|
||||
axpy(tmp_d,-1.,tmp2_d,src_d);
|
||||
precisionChange(r,tmp_d);
|
||||
c_relup = norm2(r);
|
||||
std::cout<<GridLogMessage<<"k= "<<k<<" norm2(r)= "<<c<<" "<<c_relup<<" "<<c_f<<std::endl;
|
||||
if_relup=1;
|
||||
}
|
||||
|
||||
// Convergence checks
|
||||
all_converged=1;
|
||||
for(int s=0;s<nshift;s++){
|
||||
|
||||
if ( (!converged[s]) ){
|
||||
|
||||
RealD css = c * z[s][iz]* z[s][iz];
|
||||
|
||||
if(css<rsq[s]){
|
||||
if ( ! converged[s] )
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
|
||||
converged[s]=1;
|
||||
} else {
|
||||
if (k%MaxInnerIterations==0)
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has not converged "<<css<<"<"<<rsq[s]<<std::endl;
|
||||
all_converged=0;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
if ( all_converged ){
|
||||
std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
|
||||
#else
|
||||
if ( converged[0] ){
|
||||
std::cout<<GridLogMessage<< "CGMultiShift: Shift 0 have converged iteration, terminating "<<k<<std::endl;
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
for(int s=1; s < nshift; s++) {
|
||||
Linop_f.HermOpAndNorm(sol_f[s],mmp,d,qq);
|
||||
axpy(tmp,mass[s],sol_f[s],mmp);
|
||||
axpy(r,-alpha[s],src_f,tmp);
|
||||
RealD rn = norm2(r);
|
||||
RealD cn = norm2(src_f);
|
||||
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
|
||||
}
|
||||
#endif
|
||||
iter = k;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// ugly hack
|
||||
if ( !all_converged )
|
||||
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
||||
// assert(0);
|
||||
}
|
||||
|
||||
#endif
|
||||
InnerCGtimer.Stop();
|
||||
|
||||
//Convert sol back to double and add to double prec solution
|
||||
PrecChangeTimer.Start();
|
||||
sol_d[0]=psi_d;
|
||||
for(int i=1;i<nshift;i++)precisionChange(sol_d[i], sol_f[i]);
|
||||
std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
|
||||
// Check answers
|
||||
for(int s=0; s < nshift; s++) {
|
||||
RealD tmp1,tmp2;
|
||||
Linop_d.HermOpAndNorm(sol_d[s],tmp_d,tmp1,tmp2);
|
||||
axpy(tmp2_d,shifts.poles[s],sol_d[s],tmp_d);
|
||||
axpy(tmp_d,-1.,src_d,tmp2_d);
|
||||
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(norm2(tmp_d)/norm2(src_d))<<std::endl;
|
||||
}
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
}
|
||||
|
||||
//Final trial CG
|
||||
// std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl;
|
||||
|
||||
TotalTimer.Stop();
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
168
lib/algorithms/iterative/ConjugateGradientShifted.h
Normal file
168
lib/algorithms/iterative/ConjugateGradientShifted.h
Normal file
@ -0,0 +1,168 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradient.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_GRADIENT_SHIFTED_H
|
||||
#define GRID_CONJUGATE_GRADIENT_SHIFTED_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class ConjugateGradientShifted : public OperatorFunction<Field> {
|
||||
public:
|
||||
bool ErrorOnNoConverge; //throw an assert when the CG fails to converge. Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
ConjugateGradientShifted(RealD tol,Integer maxit, bool err_on_no_conv = true) : Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) {
|
||||
};
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi ){
|
||||
(*this)(Linop,src,psi,NULL);
|
||||
}
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi, RealD *shift){
|
||||
|
||||
psi.checkerboard = src.checkerboard;
|
||||
conformable(psi,src);
|
||||
|
||||
RealD cp,c,a,d,b,ssq,qq,b_pred;
|
||||
|
||||
Field p(src);
|
||||
Field mmp(src);
|
||||
Field r(src);
|
||||
|
||||
//Initial residual computation & set up
|
||||
RealD guess = norm2(psi);
|
||||
assert(std::isnan(guess)==0);
|
||||
|
||||
Linop.HermOpAndNorm(psi,mmp,d,b);
|
||||
if(shift) axpy(mmp,*shift,psi,mmp);
|
||||
RealD rn = norm2(psi);
|
||||
if(shift) d += rn*(*shift);
|
||||
RealD d2 = real(innerProduct(psi,mmp));
|
||||
b= norm2(mmp);
|
||||
RealD src_norm=norm2(src);
|
||||
r= src-mmp;
|
||||
p= r;
|
||||
|
||||
a =norm2(p);
|
||||
cp =a;
|
||||
ssq=norm2(src);
|
||||
|
||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
|
||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
|
||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
|
||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
|
||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: cp,r "<<cp <<std::endl;
|
||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
|
||||
|
||||
RealD rsq = Tolerance* Tolerance*ssq;
|
||||
|
||||
//Check if guess is really REALLY good :)
|
||||
if ( cp <= rsq ) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" target "<<rsq<<std::endl;
|
||||
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k;
|
||||
for (k=1;k<=MaxIterations;k++){
|
||||
|
||||
c=cp;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOpAndNorm(p,mmp,d,qq);
|
||||
MatrixTimer.Stop();
|
||||
LinalgTimer.Start();
|
||||
if(shift) axpy(mmp,*shift,p,mmp);
|
||||
RealD rn = norm2(p);
|
||||
if(shift) d += rn*(*shift);
|
||||
RealD d2 = real(innerProduct(p,mmp));
|
||||
qq = norm2(mmp);
|
||||
if (k%10==1) std::cout<< std::setprecision(4)<< "d: "<<d<<" d2= "<<d2<<std::endl;
|
||||
|
||||
// RealD qqck = norm2(mmp);
|
||||
// ComplexD dck = innerProduct(p,mmp);
|
||||
|
||||
a = c/d;
|
||||
b_pred = a*(a*qq-d)/c;
|
||||
|
||||
cp = axpy_norm(r,-a,mmp,r);
|
||||
b = cp/c;
|
||||
if (k%10==1) std::cout<< std::setprecision(4)<<"k= "<<k<<" src: "<<src_norm<<" r= "<<cp<<std::endl;
|
||||
|
||||
// Fuse these loops ; should be really easy
|
||||
psi= a*p+psi;
|
||||
p = p*b+r;
|
||||
|
||||
LinalgTimer.Stop();
|
||||
std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target "<< rsq<<std::endl;
|
||||
|
||||
// Stopping condition
|
||||
if ( cp <= rsq ) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
Linop.HermOpAndNorm(psi,mmp,d,qq);
|
||||
if(shift) mmp = mmp + (*shift) * psi;
|
||||
p=mmp-src;
|
||||
|
||||
RealD mmpnorm = sqrt(norm2(mmp));
|
||||
RealD psinorm = sqrt(norm2(psi));
|
||||
RealD srcnorm = sqrt(norm2(src));
|
||||
RealD resnorm = sqrt(norm2(p));
|
||||
RealD true_residual = resnorm/srcnorm;
|
||||
|
||||
std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k
|
||||
<<" computed residual "<<sqrt(cp/ssq)
|
||||
<<" true residual " <<true_residual
|
||||
<<" target "<<Tolerance<<std::endl;
|
||||
std::cout<<GridLogMessage<<"Time elapsed: Total "<< SolverTimer.Elapsed() << " Matrix "<<MatrixTimer.Elapsed() << " Linalg "<<LinalgTimer.Elapsed();
|
||||
std::cout<<std::endl;
|
||||
|
||||
if(ErrorOnNoConverge)
|
||||
assert(true_residual/Tolerance < 1000.0);
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl;
|
||||
// assert(0);
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@ -8,6 +8,7 @@
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <iomanip>
|
||||
#include <complex>
|
||||
#include <typeinfo>
|
||||
#include <Grid/Grid.h>
|
||||
#include <Grid.h>
|
||||
|
||||
|
||||
/** Sign function **/
|
||||
|
Loading…
Reference in New Issue
Block a user