mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-27 10:09:33 +00:00 
			
		
		
		
	Compare commits
	
		
			3 Commits
		
	
	
		
			25f71913b7
			...
			feature/mi
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 1e3fb32572 | ||
|  | 0d5af667d8 | ||
|  | e9712bc7fb | 
| @@ -39,6 +39,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #include <Grid/algorithms/approx/MultiShiftFunction.h> | ||||
|  | ||||
| #include <Grid/algorithms/iterative/ConjugateGradient.h> | ||||
| #include <Grid/algorithms/iterative/ConjugateGradientShifted.h> | ||||
| #include <Grid/algorithms/iterative/ConjugateResidual.h> | ||||
| #include <Grid/algorithms/iterative/NormalEquations.h> | ||||
| #include <Grid/algorithms/iterative/SchurRedBlack.h> | ||||
|   | ||||
| @@ -45,8 +45,6 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|                            // Defaults true. | ||||
|   RealD Tolerance; | ||||
|   Integer MaxIterations; | ||||
|   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion | ||||
|    | ||||
|   ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||
|       : Tolerance(tol), | ||||
|         MaxIterations(maxit), | ||||
| @@ -157,14 +155,13 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|         std::cout << std::endl; | ||||
|  | ||||
|         if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); | ||||
| 	IterationsToComplete = k;	 | ||||
|  | ||||
|         return; | ||||
|       } | ||||
|     } | ||||
|     std::cout << GridLogMessage << "ConjugateGradient did NOT converge" | ||||
|               << std::endl; | ||||
|     if (ErrorOnNoConverge) assert(0); | ||||
|     IterationsToComplete = k; | ||||
|   } | ||||
| }; | ||||
| } | ||||
|   | ||||
| @@ -35,7 +35,6 @@ namespace Grid { | ||||
|   class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> { | ||||
|   public:                                                 | ||||
|     RealD   Tolerance; | ||||
|     RealD   InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed | ||||
|     Integer MaxInnerIterations; | ||||
|     Integer MaxOuterIterations; | ||||
|     GridBase* SinglePrecGrid; //Grid for single-precision fields | ||||
| @@ -43,16 +42,12 @@ namespace Grid { | ||||
|     LinearOperatorBase<FieldF> &Linop_f; | ||||
|     LinearOperatorBase<FieldD> &Linop_d; | ||||
|  | ||||
|     Integer TotalInnerIterations; //Number of inner CG iterations | ||||
|     Integer TotalOuterIterations; //Number of restarts | ||||
|     Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step | ||||
|  | ||||
|     //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess | ||||
|     LinearFunction<FieldF> *guesser; | ||||
|      | ||||
|     MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) : | ||||
|       Linop_f(_Linop_f), Linop_d(_Linop_d), | ||||
|       Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), | ||||
|       Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), | ||||
|       OuterLoopNormMult(100.), guesser(NULL){ }; | ||||
|  | ||||
|     void useGuesser(LinearFunction<FieldF> &g){ | ||||
| @@ -60,8 +55,9 @@ namespace Grid { | ||||
|     } | ||||
|    | ||||
|     void operator() (const FieldD &src_d_in, FieldD &sol_d){ | ||||
|       TotalInnerIterations = 0; | ||||
| 	 | ||||
| 	(*this)(src_d_in,sol_d,NULL); | ||||
|     } | ||||
|     void operator() (const FieldD &src_d_in, FieldD &sol_d, RealD *shift){ | ||||
|       GridStopWatch TotalTimer; | ||||
|       TotalTimer.Start(); | ||||
|      | ||||
| @@ -81,7 +77,7 @@ namespace Grid { | ||||
|       FieldD src_d(DoublePrecGrid); | ||||
|       src_d = src_d_in; //source for next inner iteration, computed from residual during operation | ||||
|      | ||||
|       RealD inner_tol = InnerTolerance; | ||||
|       RealD inner_tol = Tolerance; | ||||
|      | ||||
|       FieldF src_f(SinglePrecGrid); | ||||
|       src_f.checkerboard = cb; | ||||
| @@ -89,18 +85,17 @@ namespace Grid { | ||||
|       FieldF sol_f(SinglePrecGrid); | ||||
|       sol_f.checkerboard = cb; | ||||
|      | ||||
|       ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations); | ||||
|       ConjugateGradientShifted<FieldF> CG_f(inner_tol, MaxInnerIterations); | ||||
|       CG_f.ErrorOnNoConverge = false; | ||||
|  | ||||
|       GridStopWatch InnerCGtimer; | ||||
|  | ||||
|       GridStopWatch PrecChangeTimer; | ||||
|      | ||||
|       Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count | ||||
|        | ||||
|       for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){ | ||||
|       for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){ | ||||
| 	//Compute double precision rsd and also new RHS vector. | ||||
| 	Linop_d.HermOp(sol_d, tmp_d); | ||||
| 	if(shift) axpy(tmp_d,*shift,sol_d,tmp_d); | ||||
| 	RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector | ||||
|        | ||||
| 	std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl; | ||||
| @@ -124,9 +119,8 @@ namespace Grid { | ||||
| 	//Inner CG | ||||
| 	CG_f.Tolerance = inner_tol; | ||||
| 	InnerCGtimer.Start(); | ||||
| 	CG_f(Linop_f, src_f, sol_f); | ||||
| 	CG_f(Linop_f, src_f, sol_f,shift); | ||||
| 	InnerCGtimer.Stop(); | ||||
| 	TotalInnerIterations += CG_f.IterationsToComplete; | ||||
|        | ||||
| 	//Convert sol back to double and add to double prec solution | ||||
| 	PrecChangeTimer.Start(); | ||||
| @@ -139,13 +133,11 @@ namespace Grid { | ||||
|       //Final trial CG | ||||
|       std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl; | ||||
|      | ||||
|       ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations); | ||||
|       CG_d(Linop_d, src_d_in, sol_d); | ||||
|       TotalFinalStepIterations = CG_d.IterationsToComplete; | ||||
|       ConjugateGradientShifted<FieldD> CG_d(Tolerance, MaxInnerIterations); | ||||
|       CG_d(Linop_d, src_d_in, sol_d,shift); | ||||
|  | ||||
|       TotalTimer.Stop(); | ||||
|       std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl; | ||||
|       std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total time " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl; | ||||
|       std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl; | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|   | ||||
| @@ -45,6 +45,7 @@ public: | ||||
|     Integer MaxIterations; | ||||
|     int verbose; | ||||
|     MultiShiftFunction shifts; | ||||
|     int iter; | ||||
|  | ||||
|     ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) :  | ||||
| 	MaxIterations(maxit), | ||||
| @@ -60,6 +61,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) | ||||
|   std::vector<Field> results(nshift,grid); | ||||
|   (*this)(Linop,src,results,psi); | ||||
| } | ||||
|  | ||||
| void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &results, Field &psi) | ||||
| { | ||||
|   int nshift = shifts.order; | ||||
| @@ -105,11 +107,12 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector | ||||
|   RealD a,b,c,d; | ||||
|   RealD cp,bp,qq; //prev | ||||
|    | ||||
|   int cb=src.checkerboard; | ||||
|   // Matrix mult fields | ||||
|   Field r(grid); | ||||
|   Field p(grid); | ||||
|   Field p(grid); p.checkerboard = src.checkerboard; | ||||
|   Field tmp(grid); | ||||
|   Field mmp(grid); | ||||
|   Field mmp(grid);mmp.checkerboard = src.checkerboard; | ||||
|    | ||||
|   // Check lightest mass | ||||
|   for(int s=0;s<nshift;s++){ | ||||
| @@ -132,6 +135,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector | ||||
|   p=src; | ||||
|    | ||||
|   //MdagM+m[0] | ||||
|   std::cout << "p.checkerboard " << p.checkerboard | ||||
|   << "mmp.checkerboard " << mmp.checkerboard << std::endl; | ||||
|  | ||||
|   Linop.HermOpAndNorm(p,mmp,d,qq); | ||||
|   axpy(mmp,mass[0],p,mmp); | ||||
|   RealD rn = norm2(p); | ||||
| @@ -269,6 +275,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector | ||||
| 	RealD cn = norm2(src); | ||||
| 	std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl; | ||||
|       } | ||||
|       iter = k; | ||||
|       return; | ||||
|     } | ||||
|   } | ||||
|   | ||||
							
								
								
									
										404
									
								
								lib/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										404
									
								
								lib/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,404 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Chulwoo Jung <chulwoo@quark.phy.bnl.gov> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END/ LEGAL */ | ||||
| #ifndef GRID_CONJUGATE_GRADIENT_MULTI_MIXED_PREC_H | ||||
| #define GRID_CONJUGATE_GRADIENT_MULTI_MIXED_PREC_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   //Mixed precision restarted defect correction CG | ||||
|   template<class FieldD,class FieldF | ||||
| //, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0 | ||||
| //, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0 | ||||
| >  | ||||
|   class MixedPrecisionConjugateGradientMultiShift : public LinearFunction<FieldD> { | ||||
|   public:                                                 | ||||
| //    RealD   Tolerance; | ||||
|     Integer MaxInnerIterations; | ||||
|     Integer MaxOuterIterations; | ||||
|     GridBase* SinglePrecGrid; //Grid for single-precision fields | ||||
|     RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance | ||||
|     LinearOperatorBase<FieldF> &Linop_f; | ||||
|     LinearOperatorBase<FieldD> &Linop_d; | ||||
|     MultiShiftFunction shifts; | ||||
|     Integer iter; | ||||
|  | ||||
|     //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess | ||||
| //    LinearFunction<FieldF> *guesser; | ||||
|      | ||||
|     MixedPrecisionConjugateGradientMultiShift(GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d,  | ||||
| Integer maxinnerit,	MultiShiftFunction &_shifts ) : | ||||
|       Linop_f(_Linop_f), Linop_d(_Linop_d), | ||||
|       MaxInnerIterations(maxinnerit), SinglePrecGrid(_sp_grid), | ||||
|       OuterLoopNormMult(100.), shifts(_shifts) {}; | ||||
|  | ||||
|    | ||||
|     void operator() (const FieldD &src_d_in, FieldD &sol_d){ | ||||
| 	assert(0); // not yet implemented | ||||
|     } | ||||
|     void operator() (const FieldD &src_d_in, std::vector<FieldD> &sol_d){ | ||||
|       GridStopWatch TotalTimer; | ||||
|       TotalTimer.Start(); | ||||
|      | ||||
|       int cb = src_d_in.checkerboard; | ||||
|  | ||||
|       int nshift = shifts.order; | ||||
|       assert(nshift == sol_d.size()); | ||||
|       for(int i=0;i<nshift;i++) sol_d[i].checkerboard = cb; | ||||
|      | ||||
|       RealD src_norm = norm2(src_d_in); | ||||
| //      RealD stop = src_norm * Tolerance*Tolerance; | ||||
|  | ||||
|       GridBase* DoublePrecGrid = src_d_in._grid; | ||||
|       FieldD tmp_d(DoublePrecGrid); tmp_d.checkerboard = cb; | ||||
|      | ||||
|       FieldD tmp2_d(DoublePrecGrid); tmp2_d.checkerboard = cb; | ||||
|      | ||||
|       FieldD src_d(DoublePrecGrid); | ||||
|       src_d = src_d_in; //source for next inner iteration, computed from residual during operation | ||||
|      | ||||
| //      RealD inner_tol = Tolerance; | ||||
|   	FieldD psi_d(DoublePrecGrid);psi_d.checkerboard = cb; | ||||
|      | ||||
|       FieldF src_f(SinglePrecGrid); | ||||
|       src_f.checkerboard = cb; | ||||
|      | ||||
|       std::vector<FieldF> sol_f(nshift,SinglePrecGrid); | ||||
|       for(int i=0;i<nshift;i++) sol_f[i].checkerboard = cb; | ||||
|      | ||||
| //      ConjugateGradientShifted<FieldF> CG_f(inner_tol, MaxInnerIterations); | ||||
|       ConjugateGradientMultiShift<FieldF> MSCG(MaxInnerIterations,shifts); | ||||
| //      CG_f.ErrorOnNoConverge = false; | ||||
|  | ||||
|       GridStopWatch InnerCGtimer; | ||||
|  | ||||
|       GridStopWatch PrecChangeTimer; | ||||
|      | ||||
| { | ||||
| //	std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl; | ||||
|  | ||||
| //	if(norm < OuterLoopNormMult * stop){ | ||||
| //	  std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration converged on iteration " <<outer_iter <<std::endl; | ||||
| //	  break; | ||||
| //	} | ||||
| //	while(norm * inner_tol * inner_tol < stop) inner_tol *= 2;  // inner_tol = sqrt(stop/norm) ?? | ||||
|  | ||||
| 	PrecChangeTimer.Start(); | ||||
| 	precisionChange(src_f, src_d); | ||||
| 	PrecChangeTimer.Stop(); | ||||
|        | ||||
| //	zeroit(sol_f); | ||||
|  | ||||
|  | ||||
| 	//Inner CG | ||||
| 	InnerCGtimer.Start(); | ||||
|   int if_relup = 0; | ||||
| #if 0 | ||||
|         MSCG(Linop_f,src_f,sol_f); | ||||
| #else | ||||
| { | ||||
|    | ||||
|   GridBase *grid = SinglePrecGrid; | ||||
|    | ||||
|   //////////////////////////////////////////////////////////////////////// | ||||
|   // Convenience references to the info stored in "MultiShiftFunction" | ||||
|   //////////////////////////////////////////////////////////////////////// | ||||
|   int nshift = shifts.order; | ||||
|  | ||||
|  | ||||
|   std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts" | ||||
|   std::vector<RealD> &mresidual(shifts.tolerances); | ||||
|   std::vector<RealD> alpha(nshift,1.); | ||||
|   std::vector<FieldF>   ps(nshift,grid);// Search directions | ||||
|  | ||||
|   assert(sol_f.size()==nshift); | ||||
|   assert(mass.size()==nshift); | ||||
|   assert(mresidual.size()==nshift); | ||||
|    | ||||
|   // dynamic sized arrays on stack; 2d is a pain with vector | ||||
|   RealD  bs[nshift]; | ||||
|   RealD  rsq[nshift]; | ||||
|   RealD  z[nshift][2]; | ||||
|   int     converged[nshift]; | ||||
|    | ||||
|   const int       primary =0; | ||||
|    | ||||
|   //Primary shift fields CG iteration | ||||
|   RealD a,b,c,d; | ||||
|   RealD cp,bp,qq; //prev | ||||
|    | ||||
|   int cb=src_f.checkerboard; | ||||
|   // Matrix mult fields | ||||
|   FieldF r(grid); r.checkerboard = src_f.checkerboard; | ||||
|   FieldF p(grid); p.checkerboard = src_f.checkerboard; | ||||
|   FieldF tmp(grid); tmp.checkerboard = src_f.checkerboard; | ||||
|   FieldF mmp(grid);mmp.checkerboard = src_f.checkerboard; | ||||
|   FieldF psi(grid);psi.checkerboard = src_f.checkerboard; | ||||
|     std::cout.precision(12); | ||||
|     std::cout<<GridLogMessage<<"norm2(psi_d)= "<<norm2(psi_d)<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"norm2(psi)= "<<norm2(psi)<<std::endl; | ||||
|    | ||||
|    | ||||
|   // Check lightest mass | ||||
|   for(int s=0;s<nshift;s++){ | ||||
|     assert( mass[s]>= mass[primary] ); | ||||
|     converged[s]=0; | ||||
|   } | ||||
|    | ||||
|   // Wire guess to zero | ||||
|   // Residuals "r" are src | ||||
|   // First search direction "p" is also src | ||||
|   cp = norm2(src_f); | ||||
|   Real c_relup = cp; | ||||
|   for(int s=0;s<nshift;s++){ | ||||
|     rsq[s] = cp * mresidual[s] * mresidual[s]; | ||||
|     std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradientMultiShift: shift "<<s | ||||
| 	     <<" target resid "<<rsq[s]<<std::endl; | ||||
|     ps[s] = src_f; | ||||
|   } | ||||
|   // r and p for primary | ||||
|   r=src_f; | ||||
|   p=src_f; | ||||
|    | ||||
|   //MdagM+m[0] | ||||
|   std::cout << "p.checkerboard " << p.checkerboard | ||||
|   << "mmp.checkerboard " << mmp.checkerboard << std::endl; | ||||
|  | ||||
|   Linop_f.HermOpAndNorm(p,mmp,d,qq); | ||||
|   axpy(mmp,mass[0],p,mmp); | ||||
|   RealD rn = norm2(p); | ||||
|   d += rn*mass[0]; | ||||
|    | ||||
|   // have verified that inner product of  | ||||
|   // p and mmp is equal to d after this since | ||||
|   // the d computation is tricky | ||||
|   //  qq = real(innerProduct(p,mmp)); | ||||
|   //  std::cout<<GridLogMessage << "debug equal ?  qq "<<qq<<" d "<< d<<std::endl; | ||||
|    | ||||
|   b = -cp /d; | ||||
|    | ||||
|   // Set up the various shift variables | ||||
|   int       iz=0; | ||||
|   z[0][1-iz] = 1.0; | ||||
|   z[0][iz]   = 1.0; | ||||
|   bs[0]      = b; | ||||
|   for(int s=1;s<nshift;s++){ | ||||
|     z[s][1-iz] = 1.0; | ||||
|     z[s][iz]   = 1.0/( 1.0 - b*(mass[s]-mass[0])); | ||||
|     bs[s]      = b*z[s][iz];  | ||||
|   } | ||||
|    | ||||
|   // r += b[0] A.p[0] | ||||
|   // c= norm(r) | ||||
|   c=axpy_norm(r,b,mmp,r); | ||||
|    | ||||
|  axpby(psi,0.,-bs[0],src_f,src_f); | ||||
|   for(int s=0;s<nshift;s++) { | ||||
|     axpby(sol_f[s],0.,-bs[s]*alpha[s],src_f,src_f); | ||||
|   } | ||||
|    | ||||
|    | ||||
|   // Iteration loop | ||||
|   int k; | ||||
|  // inefficient zeroing, please replace! | ||||
| //  RealD sol_norm = axpy_norm(sol_d[0],-1.,sol_d[0],sol_d[0]); | ||||
|   zeroit(sol_d[0]); | ||||
|   std::cout<<GridLogMessage<<"norm(sol_d[0])= "<<norm2(sol_d[0])<<std::endl; | ||||
|    | ||||
|  | ||||
|   int all_converged = 1; | ||||
| 	RealD tmp1,tmp2; | ||||
|   for (k=1;k<=MaxOuterIterations;k++){ | ||||
|      | ||||
|     a = c /cp; | ||||
|     axpy(p,a,p,r); | ||||
|      | ||||
|     // Note to self - direction ps is iterated seperately | ||||
|     // for each shift. Does not appear to have any scope | ||||
|     // for avoiding linear algebra in "single" case. | ||||
|     //  | ||||
|     // However SAME r is used. Could load "r" and update | ||||
|     // ALL ps[s]. 2/3 Bandwidth saving | ||||
|     // New Kernel: Load r, vector of coeffs, vector of pointers ps | ||||
|     for(int s=0;s<nshift;s++){ | ||||
|       if ( ! converged[s] ) {  | ||||
| 	if (s==0){ | ||||
| 	  axpy(ps[s],a,ps[s],r); | ||||
| 	} else{ | ||||
| 	  RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b); | ||||
| 	  axpby(ps[s],z[s][iz],as,r,ps[s]); | ||||
| 	} | ||||
|       } | ||||
|     } | ||||
|      | ||||
|     cp=c; | ||||
|      | ||||
|     Linop_f.HermOpAndNorm(p,mmp,d,qq); | ||||
|     axpy(mmp,mass[0],p,mmp); | ||||
|     RealD rn = norm2(p); | ||||
|     d += rn*mass[0]; | ||||
|      | ||||
|     bp=b; | ||||
|     b=-cp/d; | ||||
|      | ||||
|     c=axpy_norm(r,b,mmp,r); | ||||
|  | ||||
|  | ||||
|     // Toggle the recurrence history | ||||
|     bs[0] = b; | ||||
|     iz = 1-iz; | ||||
|     for(int s=1;s<nshift;s++){ | ||||
|       if((!converged[s])){ | ||||
| 	RealD z0 = z[s][1-iz]; | ||||
| 	RealD z1 = z[s][iz]; | ||||
| 	z[s][iz] = z0*z1*bp | ||||
| 	  / (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));  | ||||
| 	bs[s] = b*z[s][iz]/z0; // NB sign  rel to Mike | ||||
|       } | ||||
|     } | ||||
|      | ||||
|     axpy(psi,-bs[0],ps[0],psi); | ||||
|     for(int s=0;s<nshift;s++){ | ||||
|       int ss = s; | ||||
|       // Scope for optimisation here in case of "single". | ||||
|       // Could load sol_f[0] and pull all ps[s] in. | ||||
|       //      if ( single ) ss=primary; | ||||
|       // Bandwith saving in single case is Ls * 3 -> 2+Ls, so ~ 3x saving | ||||
|       // Pipelined CG gain: | ||||
|       // | ||||
|       // New Kernel: Load r, vector of coeffs, vector of pointers ps | ||||
|       // New Kernel: Load sol_f[0], vector of coeffs, vector of pointers ps | ||||
|       // If can predict the coefficient bs then we can fuse these and avoid write reread cyce | ||||
|       //  on ps[s]. | ||||
|       // Before:  3 x npole  + 3 x npole | ||||
|       // After :  2 x npole (ps[s])        => 3x speed up of multishift CG. | ||||
|        | ||||
|       if( (!converged[s]) ) {  | ||||
| 	axpy(sol_f[ss],-bs[s]*alpha[s],ps[s],sol_f[ss]); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|  | ||||
|     if (k%MaxInnerIterations==0){ | ||||
| //    if (c < 1e-4*c_relup){ | ||||
|        RealD c_f=c; | ||||
|        precisionChange(tmp_d,psi); | ||||
|        RealD sol_norm =axpy_norm (psi_d,1.,tmp_d,psi_d); | ||||
|        tmp1 = norm2(psi); | ||||
|        zeroit(psi); | ||||
|        tmp2 = norm2(psi); | ||||
|        std::cout<<GridLogMessage<<"k= "<<k<<" norm2(sol)= "<<sol_norm<<" "<<tmp1<<" "<<tmp2<<std::endl; | ||||
| //       precisionChange(sol_d[0],sol_f[0]); | ||||
|        Linop_d.HermOpAndNorm(psi_d,tmp_d,tmp1,tmp2); | ||||
|        axpy(tmp2_d,mass[0],psi_d,tmp_d); | ||||
|        axpy(tmp_d,-1.,tmp2_d,src_d); | ||||
|        precisionChange(r,tmp_d); | ||||
| 	c_relup = norm2(r); | ||||
|        std::cout<<GridLogMessage<<"k= "<<k<<" norm2(r)= "<<c<<" "<<c_relup<<" "<<c_f<<std::endl; | ||||
| 	if_relup=1; | ||||
|     } | ||||
|      | ||||
|     // Convergence checks | ||||
|   all_converged=1; | ||||
|     for(int s=0;s<nshift;s++){ | ||||
|        | ||||
|       if ( (!converged[s]) ){ | ||||
| 	 | ||||
| 	RealD css  = c * z[s][iz]* z[s][iz]; | ||||
| 	 | ||||
| 	if(css<rsq[s]){ | ||||
| 	  if ( ! converged[s] ) | ||||
| 	    std::cout<<GridLogMessage<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has converged"<<std::endl; | ||||
| 	      converged[s]=1; | ||||
| 	} else { | ||||
| 		if (k%MaxInnerIterations==0) | ||||
| 	    std::cout<<GridLogMessage<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has not converged "<<css<<"<"<<rsq[s]<<std::endl; | ||||
| 	  all_converged=0; | ||||
| 	} | ||||
|  | ||||
|       } | ||||
|     } | ||||
|      | ||||
| #if 0 | ||||
|     if ( all_converged ){ | ||||
|       std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl; | ||||
| #else | ||||
|     if ( converged[0] ){ | ||||
|       std::cout<<GridLogMessage<< "CGMultiShift: Shift 0 have converged iteration, terminating  "<<k<<std::endl; | ||||
| #endif | ||||
|        | ||||
| #if 1 | ||||
|       for(int s=1; s < nshift; s++) {  | ||||
| 	Linop_f.HermOpAndNorm(sol_f[s],mmp,d,qq); | ||||
| 	axpy(tmp,mass[s],sol_f[s],mmp); | ||||
| 	axpy(r,-alpha[s],src_f,tmp); | ||||
| 	RealD rn = norm2(r); | ||||
| 	RealD cn = norm2(src_f); | ||||
| 	std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl; | ||||
|       } | ||||
| #endif | ||||
|      iter = k; | ||||
|       break; | ||||
|     } | ||||
|   } | ||||
|   // ugly hack | ||||
|   if ( !all_converged ) | ||||
|   std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl; | ||||
| //  assert(0); | ||||
| } | ||||
| 	 | ||||
| #endif | ||||
| 	InnerCGtimer.Stop(); | ||||
|        | ||||
| 	//Convert sol back to double and add to double prec solution | ||||
| 	PrecChangeTimer.Start(); | ||||
| 	sol_d[0]=psi_d; | ||||
| 	for(int i=1;i<nshift;i++)precisionChange(sol_d[i], sol_f[i]); | ||||
|       std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl; | ||||
|       // Check answers  | ||||
|       for(int s=0; s < nshift; s++) {  | ||||
| 	RealD tmp1,tmp2; | ||||
|        Linop_d.HermOpAndNorm(sol_d[s],tmp_d,tmp1,tmp2); | ||||
|        axpy(tmp2_d,shifts.poles[s],sol_d[s],tmp_d); | ||||
|        axpy(tmp_d,-1.,src_d,tmp2_d); | ||||
| 	std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(norm2(tmp_d)/norm2(src_d))<<std::endl; | ||||
|       } | ||||
| 	PrecChangeTimer.Stop(); | ||||
|        | ||||
| } | ||||
|      | ||||
|       //Final trial CG | ||||
|  //     std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl; | ||||
|      | ||||
|       TotalTimer.Stop(); | ||||
|       std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl; | ||||
|     } | ||||
|   }; | ||||
|  | ||||
| } | ||||
|  | ||||
| #endif | ||||
							
								
								
									
										168
									
								
								lib/algorithms/iterative/ConjugateGradientShifted.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										168
									
								
								lib/algorithms/iterative/ConjugateGradientShifted.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,168 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/ConjugateGradient.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef GRID_CONJUGATE_GRADIENT_SHIFTED_H | ||||
| #define GRID_CONJUGATE_GRADIENT_SHIFTED_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     // Base classes for iterative processes based on operators | ||||
|     // single input vec, single output vec. | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|  | ||||
|   template<class Field>  | ||||
|     class ConjugateGradientShifted : public OperatorFunction<Field> { | ||||
| public:                                                 | ||||
|     bool ErrorOnNoConverge; //throw an assert when the CG fails to converge. Defaults true. | ||||
|     RealD   Tolerance; | ||||
|     Integer MaxIterations; | ||||
|     ConjugateGradientShifted(RealD tol,Integer maxit, bool err_on_no_conv = true) : Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) {  | ||||
|     }; | ||||
|  | ||||
|     void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi ){ | ||||
| 	(*this)(Linop,src,psi,NULL); | ||||
|     } | ||||
|  | ||||
|     void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi, RealD *shift){ | ||||
|  | ||||
|       psi.checkerboard = src.checkerboard; | ||||
|       conformable(psi,src); | ||||
|  | ||||
|       RealD cp,c,a,d,b,ssq,qq,b_pred; | ||||
|        | ||||
|       Field   p(src); | ||||
|       Field mmp(src); | ||||
|       Field   r(src); | ||||
|        | ||||
|       //Initial residual computation & set up | ||||
|       RealD guess = norm2(psi); | ||||
|       assert(std::isnan(guess)==0); | ||||
|  | ||||
|       Linop.HermOpAndNorm(psi,mmp,d,b); | ||||
| 	if(shift) axpy(mmp,*shift,psi,mmp); | ||||
| 	RealD rn = norm2(psi); | ||||
| 	if(shift) d += rn*(*shift); | ||||
| 	RealD d2 = real(innerProduct(psi,mmp)); | ||||
| 	b= norm2(mmp); | ||||
|       RealD src_norm=norm2(src); | ||||
|       r= src-mmp; | ||||
|       p= r; | ||||
|        | ||||
|       a  =norm2(p); | ||||
|       cp =a; | ||||
|       ssq=norm2(src); | ||||
|  | ||||
|       std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl; | ||||
|       std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:   src "<<ssq  <<std::endl; | ||||
|       std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:    mp "<<d    <<std::endl; | ||||
|       std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:   mmp "<<b    <<std::endl; | ||||
|       std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:  cp,r "<<cp   <<std::endl; | ||||
|       std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:     p "<<a    <<std::endl; | ||||
|  | ||||
|       RealD rsq =  Tolerance* Tolerance*ssq; | ||||
|        | ||||
|       //Check if guess is really REALLY good :) | ||||
|       if ( cp <= rsq ) { | ||||
| 	return; | ||||
|       } | ||||
|        | ||||
|       std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" target "<<rsq<<std::endl; | ||||
|  | ||||
|       GridStopWatch LinalgTimer; | ||||
|       GridStopWatch MatrixTimer; | ||||
|       GridStopWatch SolverTimer; | ||||
|  | ||||
|       SolverTimer.Start(); | ||||
|       int k; | ||||
|       for (k=1;k<=MaxIterations;k++){ | ||||
| 	 | ||||
| 	c=cp; | ||||
|  | ||||
| 	MatrixTimer.Start(); | ||||
| 	Linop.HermOpAndNorm(p,mmp,d,qq); | ||||
| 	MatrixTimer.Stop(); | ||||
| 	LinalgTimer.Start(); | ||||
| 	if(shift) axpy(mmp,*shift,p,mmp); | ||||
| 	RealD rn = norm2(p); | ||||
| 	if(shift) d += rn*(*shift); | ||||
| 	RealD d2 = real(innerProduct(p,mmp)); | ||||
| 	qq = norm2(mmp); | ||||
|       if (k%10==1) std::cout<< std::setprecision(4)<< "d:  "<<d<<" d2= "<<d2<<std::endl; | ||||
|  | ||||
| 	//	RealD    qqck = norm2(mmp); | ||||
| 	//	ComplexD dck  = innerProduct(p,mmp); | ||||
|        | ||||
| 	a      = c/d; | ||||
| 	b_pred = a*(a*qq-d)/c; | ||||
|  | ||||
| 	cp = axpy_norm(r,-a,mmp,r); | ||||
| 	b = cp/c; | ||||
|       if (k%10==1) std::cout<< std::setprecision(4)<<"k= "<<k<<" src:  "<<src_norm<<" r= "<<cp<<std::endl; | ||||
| 	 | ||||
| 	// Fuse these loops ; should be really easy | ||||
| 	psi= a*p+psi; | ||||
| 	p  = p*b+r; | ||||
| 	   | ||||
| 	LinalgTimer.Stop(); | ||||
| 	std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target "<< rsq<<std::endl; | ||||
| 	 | ||||
| 	// Stopping condition | ||||
| 	if ( cp <= rsq ) {  | ||||
| 	   | ||||
| 	  SolverTimer.Stop(); | ||||
| 	  Linop.HermOpAndNorm(psi,mmp,d,qq); | ||||
| 	  if(shift) mmp = mmp + (*shift) * psi; | ||||
| 	  p=mmp-src; | ||||
| 	   | ||||
| 	  RealD mmpnorm = sqrt(norm2(mmp)); | ||||
| 	  RealD psinorm = sqrt(norm2(psi)); | ||||
| 	  RealD srcnorm = sqrt(norm2(src)); | ||||
| 	  RealD resnorm = sqrt(norm2(p)); | ||||
| 	  RealD true_residual = resnorm/srcnorm; | ||||
|  | ||||
| 	  std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k | ||||
| 		   <<" computed residual "<<sqrt(cp/ssq) | ||||
| 		   <<" true residual "    <<true_residual | ||||
| 		   <<" target "<<Tolerance<<std::endl; | ||||
| 	  std::cout<<GridLogMessage<<"Time elapsed: Total "<< SolverTimer.Elapsed() << " Matrix  "<<MatrixTimer.Elapsed() << " Linalg "<<LinalgTimer.Elapsed(); | ||||
| 	  std::cout<<std::endl; | ||||
| 	   | ||||
| 	if(ErrorOnNoConverge) | ||||
| 	  assert(true_residual/Tolerance < 1000.0); | ||||
|  | ||||
| 	  return; | ||||
| 	} | ||||
|       } | ||||
|       std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl; | ||||
| //      assert(0); | ||||
|     } | ||||
|   }; | ||||
| } | ||||
| #endif | ||||
| @@ -31,11 +31,16 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
| #include <string.h> //memset | ||||
| #ifdef USE_LAPACK | ||||
| #ifdef USE_MKL | ||||
| #include<mkl_lapack.h> | ||||
| #else | ||||
| void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, | ||||
|                    double *vl, double *vu, int *il, int *iu, double *abstol, | ||||
|                    int *m, double *w, double *z, int *ldz, int *isuppz, | ||||
|                    double *work, int *lwork, int *iwork, int *liwork, | ||||
|                    int *info); | ||||
| //#include <lapacke/lapacke.h> | ||||
| #endif | ||||
| #endif | ||||
| #include "DenseMatrix.h" | ||||
| #include "EigenSort.h" | ||||
| @@ -62,12 +67,13 @@ public: | ||||
|     int Np;      // Np -- Number of spare vecs in kryloc space | ||||
|     int Nm;      // Nm -- total number of vectors | ||||
|  | ||||
|  | ||||
|     RealD OrthoTime; | ||||
|  | ||||
|     RealD eresid; | ||||
|  | ||||
|     SortEigen<Field> _sort; | ||||
|  | ||||
| //    GridCartesian &_fgrid; | ||||
|  | ||||
|     LinearOperatorBase<Field> &_Linop; | ||||
|  | ||||
|     OperatorFunction<Field>   &_poly; | ||||
| @@ -124,23 +130,23 @@ public: | ||||
|  | ||||
|       GridBase *grid = evec[0]._grid; | ||||
|       Field w(grid); | ||||
|       std::cout << "RitzMatrix "<<std::endl; | ||||
|       std::cout<<GridLogMessage << "RitzMatrix "<<std::endl; | ||||
|       for(int i=0;i<k;i++){ | ||||
| 	_poly(_Linop,evec[i],w); | ||||
| 	std::cout << "["<<i<<"] "; | ||||
| 	std::cout<<GridLogMessage << "["<<i<<"] "; | ||||
| 	for(int j=0;j<k;j++){ | ||||
| 	  ComplexD in = innerProduct(evec[j],w); | ||||
| 	  if ( fabs((double)i-j)>1 ) {  | ||||
| 	    if (abs(in) >1.0e-9 )  {  | ||||
| 	      std::cout<<"oops"<<std::endl; | ||||
| 	      std::cout<<GridLogMessage<<"oops"<<std::endl; | ||||
| 	      abort(); | ||||
| 	    } else  | ||||
| 	      std::cout << " 0 "; | ||||
| 	      std::cout<<GridLogMessage << " 0 "; | ||||
| 	  } else {  | ||||
| 	    std::cout << " "<<in<<" "; | ||||
| 	    std::cout<<GridLogMessage << " "<<in<<" "; | ||||
| 	  } | ||||
| 	} | ||||
| 	std::cout << std::endl; | ||||
| 	std::cout<<GridLogMessage << std::endl; | ||||
|       } | ||||
|     } | ||||
|  | ||||
| @@ -174,10 +180,10 @@ public: | ||||
|       RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop | ||||
|                                  // 7. vk+1 := wk/βk+1 | ||||
|  | ||||
| //	std::cout << "alpha = " << zalph << " beta "<<beta<<std::endl; | ||||
| 	std::cout<<GridLogMessage << "alpha = " << zalph << " beta "<<beta<<std::endl; | ||||
|       const RealD tiny = 1.0e-20; | ||||
|       if ( beta < tiny ) {  | ||||
| 	std::cout << " beta is tiny "<<beta<<std::endl; | ||||
| 	std::cout<<GridLogMessage << " beta is tiny "<<beta<<std::endl; | ||||
|      } | ||||
|       lmd[k] = alph; | ||||
|       lme[k]  = beta; | ||||
| @@ -253,6 +259,7 @@ public: | ||||
|     } | ||||
|  | ||||
| #ifdef USE_LAPACK | ||||
| #define LAPACK_INT long long | ||||
|     void diagonalize_lapack(DenseVector<RealD>& lmd, | ||||
| 		     DenseVector<RealD>& lme,  | ||||
| 		     int N1, | ||||
| @@ -262,7 +269,7 @@ public: | ||||
|   const int size = Nm; | ||||
| //  tevals.resize(size); | ||||
| //  tevecs.resize(size); | ||||
|   int NN = N1; | ||||
|   LAPACK_INT NN = N1; | ||||
|   double evals_tmp[NN]; | ||||
|   double evec_tmp[NN][NN]; | ||||
|   memset(evec_tmp[0],0,sizeof(double)*NN*NN); | ||||
| @@ -276,19 +283,19 @@ public: | ||||
|         if (i==j) evals_tmp[i] = lmd[i]; | ||||
|         if (j==(i-1)) EE[j] = lme[j]; | ||||
|       } | ||||
|   int evals_found; | ||||
|   int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; | ||||
|   int liwork =  3+NN*10 ; | ||||
|   int iwork[liwork]; | ||||
|   LAPACK_INT evals_found; | ||||
|   LAPACK_INT lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; | ||||
|   LAPACK_INT liwork =  3+NN*10 ; | ||||
|   LAPACK_INT iwork[liwork]; | ||||
|   double work[lwork]; | ||||
|   int isuppz[2*NN]; | ||||
|   LAPACK_INT isuppz[2*NN]; | ||||
|   char jobz = 'V'; // calculate evals & evecs | ||||
|   char range = 'I'; // calculate all evals | ||||
|   //    char range = 'A'; // calculate all evals | ||||
|   char uplo = 'U'; // refer to upper half of original matrix | ||||
|   char compz = 'I'; // Compute eigenvectors of tridiagonal matrix | ||||
|   int ifail[NN]; | ||||
|   int info; | ||||
|   long long info; | ||||
| //  int total = QMP_get_number_of_nodes(); | ||||
| //  int node = QMP_get_node_number(); | ||||
| //  GridBase *grid = evec[0]._grid; | ||||
| @@ -296,14 +303,18 @@ public: | ||||
|   int node = grid->_processor; | ||||
|   int interval = (NN/total)+1; | ||||
|   double vl = 0.0, vu = 0.0; | ||||
|   int il = interval*node+1 , iu = interval*(node+1); | ||||
|   LAPACK_INT il = interval*node+1 , iu = interval*(node+1); | ||||
|   if (iu > NN)  iu=NN; | ||||
|   double tol = 0.0; | ||||
|     if (1) { | ||||
|       memset(evals_tmp,0,sizeof(double)*NN); | ||||
|       if ( il <= NN){ | ||||
|         printf("total=%d node=%d il=%d iu=%d\n",total,node,il,iu); | ||||
| #ifdef USE_MKL | ||||
|         dstegr(&jobz, &range, &NN, | ||||
| #else | ||||
|         LAPACK_dstegr(&jobz, &range, &NN, | ||||
| #endif | ||||
|             (double*)DD, (double*)EE, | ||||
|             &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' | ||||
|             &tol, // tolerance | ||||
| @@ -335,6 +346,7 @@ public: | ||||
|       lmd [NN-1-i]=evals_tmp[i]; | ||||
|   } | ||||
| } | ||||
| #undef LAPACK_INT  | ||||
| #endif | ||||
|  | ||||
|  | ||||
| @@ -365,12 +377,14 @@ public: | ||||
| //	diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid); | ||||
| #endif | ||||
|  | ||||
|       int Niter = 100*N1; | ||||
|       int Niter = 10000*N1; | ||||
|       int kmin = 1; | ||||
|       int kmax = N2; | ||||
|       // (this should be more sophisticated) | ||||
|  | ||||
|       for(int iter=0; iter<Niter; ++iter){ | ||||
|       for(int iter=0; ; ++iter){ | ||||
|       if ( (iter+1)%(100*N1)==0)  | ||||
|       std::cout<<GridLogMessage << "[QL method] Not converged - iteration "<<iter+1<<"\n"; | ||||
|  | ||||
| 	// determination of 2x2 leading submatrix | ||||
| 	RealD dsub = lmd[kmax-1]-lmd[kmax-2]; | ||||
| @@ -399,11 +413,11 @@ public: | ||||
|         _sort.push(lmd3,N2); | ||||
|         _sort.push(lmd2,N2); | ||||
|          for(int k=0; k<N2; ++k){ | ||||
| 	    if (fabs(lmd2[k] - lmd3[k]) >SMALL)  std::cout <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl; | ||||
| //	    if (fabs(lme2[k] - lme[k]) >SMALL)  std::cout <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl; | ||||
| 	    if (fabs(lmd2[k] - lmd3[k]) >SMALL)  std::cout<<GridLogMessage <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl; | ||||
| //	    if (fabs(lme2[k] - lme[k]) >SMALL)  std::cout<<GridLogMessage <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl; | ||||
| 	  } | ||||
|          for(int k=0; k<N1*N1; ++k){ | ||||
| //	    if (fabs(Qt2[k] - Qt[k]) >SMALL)  std::cout <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl; | ||||
| //	    if (fabs(Qt2[k] - Qt[k]) >SMALL)  std::cout<<GridLogMessage <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl; | ||||
| 	} | ||||
|     } | ||||
| #endif | ||||
| @@ -418,7 +432,7 @@ public: | ||||
| 	  } | ||||
| 	} | ||||
|       } | ||||
|       std::cout << "[QL method] Error - Too many iteration: "<<Niter<<"\n"; | ||||
|       std::cout<<GridLogMessage << "[QL method] Error - Too many iteration: "<<Niter<<"\n"; | ||||
|       abort(); | ||||
|     } | ||||
|  | ||||
| @@ -435,6 +449,7 @@ public: | ||||
| 		       DenseVector<Field>& evec, | ||||
| 		       int k) | ||||
|     { | ||||
|       double t0=-usecond()/1e6; | ||||
|       typedef typename Field::scalar_type MyComplex; | ||||
|       MyComplex ip; | ||||
|  | ||||
| @@ -453,6 +468,8 @@ public: | ||||
| 	w = w - ip * evec[j]; | ||||
|       } | ||||
|       normalise(w); | ||||
|       t0+=usecond()/1e6; | ||||
|       OrthoTime +=t0; | ||||
|     } | ||||
|  | ||||
|     void setUnit_Qt(int Nm, DenseVector<RealD> &Qt) { | ||||
| @@ -486,10 +503,10 @@ until convergence | ||||
| 	GridBase *grid = evec[0]._grid; | ||||
| 	assert(grid == src._grid); | ||||
|  | ||||
| 	std::cout << " -- Nk = " << Nk << " Np = "<< Np << std::endl; | ||||
| 	std::cout << " -- Nm = " << Nm << std::endl; | ||||
| 	std::cout << " -- size of eval   = " << eval.size() << std::endl; | ||||
| 	std::cout << " -- size of evec  = " << evec.size() << std::endl; | ||||
| 	std::cout<<GridLogMessage << " -- Nk = " << Nk << " Np = "<< Np << std::endl; | ||||
| 	std::cout<<GridLogMessage << " -- Nm = " << Nm << std::endl; | ||||
| 	std::cout<<GridLogMessage << " -- size of eval   = " << eval.size() << std::endl; | ||||
| 	std::cout<<GridLogMessage << " -- size of evec  = " << evec.size() << std::endl; | ||||
| 	 | ||||
| 	assert(Nm == evec.size() && Nm == eval.size()); | ||||
| 	 | ||||
| @@ -500,6 +517,7 @@ until convergence | ||||
| 	DenseVector<int>   Iconv(Nm); | ||||
|  | ||||
| 	DenseVector<Field>  B(Nm,grid); // waste of space replicating | ||||
| //	DenseVector<Field>  Btemp(Nm,grid); // waste of space replicating | ||||
| 	 | ||||
| 	Field f(grid); | ||||
| 	Field v(grid); | ||||
| @@ -515,35 +533,48 @@ until convergence | ||||
| 	// (uniform vector) Why not src?? | ||||
| 	//	evec[0] = 1.0; | ||||
| 	evec[0] = src; | ||||
| 	std:: cout <<"norm2(src)= " << norm2(src)<<std::endl; | ||||
| 	std:: cout<<GridLogMessage <<"norm2(src)= " << norm2(src)<<std::endl; | ||||
| // << src._grid  << std::endl; | ||||
| 	normalise(evec[0]); | ||||
| 	std:: cout <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl; | ||||
| 	std:: cout<<GridLogMessage <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl; | ||||
| // << evec[0]._grid << std::endl; | ||||
| 	 | ||||
| 	// Initial Nk steps | ||||
| 	OrthoTime=0.; | ||||
| 	double t0=usecond()/1e6; | ||||
| 	for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k); | ||||
| //	std:: cout <<"norm2(evec[1])= " << norm2(evec[1]) << std::endl; | ||||
| //	std:: cout <<"norm2(evec[2])= " << norm2(evec[2]) << std::endl; | ||||
| 	double t1=usecond()/1e6; | ||||
| 	std::cout<<GridLogMessage <<"IRL::Initial steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||
| 	std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl; | ||||
| //	std:: cout<<GridLogMessage <<"norm2(evec[1])= " << norm2(evec[1]) << std::endl; | ||||
| //	std:: cout<<GridLogMessage <<"norm2(evec[2])= " << norm2(evec[2]) << std::endl; | ||||
| 	RitzMatrix(evec,Nk); | ||||
| 	t1=usecond()/1e6; | ||||
| 	std::cout<<GridLogMessage <<"IRL::RitzMatrix: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||
| 	for(int k=0; k<Nk; ++k){ | ||||
| //	std:: cout <<"eval " << k << " " <<eval[k] << std::endl; | ||||
| //	std:: cout <<"lme " << k << " " << lme[k] << std::endl; | ||||
| //	std:: cout<<GridLogMessage <<"eval " << k << " " <<eval[k] << std::endl; | ||||
| //	std:: cout<<GridLogMessage <<"lme " << k << " " << lme[k] << std::endl; | ||||
| 	} | ||||
|  | ||||
| 	// Restarting loop begins | ||||
| 	for(int iter = 0; iter<Niter; ++iter){ | ||||
|  | ||||
| 	  std::cout<<"\n Restart iteration = "<< iter << std::endl; | ||||
| 	  std::cout<<GridLogMessage<<"\n Restart iteration = "<< iter << std::endl; | ||||
|  | ||||
| 	  //  | ||||
| 	  // Rudy does a sort first which looks very different. Getting fed up with sorting out the algo defs. | ||||
| 	  // We loop over  | ||||
| 	  // | ||||
| 	OrthoTime=0.; | ||||
| 	  for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k); | ||||
| 	t1=usecond()/1e6; | ||||
| 	std::cout<<GridLogMessage <<"IRL:: "<<Np <<" steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||
| 	std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl; | ||||
| 	  f *= lme[Nm-1]; | ||||
|  | ||||
| 	  RitzMatrix(evec,k2); | ||||
| 	t1=usecond()/1e6; | ||||
| 	std::cout<<GridLogMessage <<"IRL:: RitzMatrix: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||
| 	   | ||||
| 	  // getting eigenvalues | ||||
| 	  for(int k=0; k<Nm; ++k){ | ||||
| @@ -552,18 +583,27 @@ until convergence | ||||
| 	  } | ||||
| 	  setUnit_Qt(Nm,Qt); | ||||
| 	  diagonalize(eval2,lme2,Nm,Nm,Qt,grid); | ||||
| 	t1=usecond()/1e6; | ||||
| 	std::cout<<GridLogMessage <<"IRL:: diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||
|  | ||||
| 	  // sorting | ||||
| 	  _sort.push(eval2,Nm); | ||||
| 	t1=usecond()/1e6; | ||||
| 	std::cout<<GridLogMessage <<"IRL:: eval sorting: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||
| 	   | ||||
| 	  // Implicitly shifted QR transformations | ||||
| 	  setUnit_Qt(Nm,Qt); | ||||
| 	  for(int ip=0; ip<k2; ++ip){ | ||||
| 	std::cout<<GridLogMessage << "eval "<< ip << " "<< eval2[ip] << std::endl; | ||||
| 	} | ||||
| 	  for(int ip=k2; ip<Nm; ++ip){  | ||||
| 	std::cout << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl; | ||||
| 	std::cout<<GridLogMessage << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl; | ||||
| 	    qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm); | ||||
| 		 | ||||
| 	} | ||||
|      | ||||
| 	t1=usecond()/1e6; | ||||
| 	std::cout<<GridLogMessage <<"IRL::qr_decomp: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||
| if (0) {   | ||||
| 	  for(int i=0; i<(Nk+1); ++i) B[i] = 0.0; | ||||
| 	   | ||||
| 	  for(int j=k1-1; j<k2+1; ++j){ | ||||
| @@ -572,14 +612,38 @@ until convergence | ||||
| 	      B[j] += Qt[k+Nm*j] * evec[k]; | ||||
| 	    } | ||||
| 	  } | ||||
| 	  for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j]; | ||||
| 	t1=usecond()/1e6; | ||||
| 	std::cout<<GridLogMessage <<"IRL::QR Rotate: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||
| } | ||||
|  | ||||
| if (1) { | ||||
| 	for(int i=0; i<(Nk+1); ++i) { | ||||
| 		B[i] = 0.0; | ||||
| 	  	B[i].checkerboard = evec[0].checkerboard; | ||||
| 	} | ||||
|  | ||||
| 	int j_block = 24; int k_block=24; | ||||
| PARALLEL_FOR_LOOP | ||||
| 	for(int ss=0;ss < grid->oSites();ss++){ | ||||
| 	for(int jj=k1-1; jj<k2+1; jj += j_block) | ||||
| 	for(int kk=0; kk<Nm; kk += k_block) | ||||
| 	for(int j=jj; (j<(k2+1)) && j<(jj+j_block); ++j){ | ||||
| 	for(int k=kk; (k<Nm) && k<(kk+k_block) ; ++k){ | ||||
| 	    B[j]._odata[ss] +=Qt[k+Nm*j] * evec[k]._odata[ss];  | ||||
| 	} | ||||
| 	} | ||||
| 	} | ||||
| 	t1=usecond()/1e6; | ||||
| 	std::cout<<GridLogMessage <<"IRL::QR rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||
| } | ||||
| 	for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j]; | ||||
|  | ||||
| 	  // Compressed vector f and beta(k2) | ||||
| 	  f *= Qt[Nm-1+Nm*(k2-1)]; | ||||
| 	  f += lme[k2-1] * evec[k2]; | ||||
| 	  beta_k = norm2(f); | ||||
| 	  beta_k = sqrt(beta_k); | ||||
| 	  std::cout<<" beta(k) = "<<beta_k<<std::endl; | ||||
| 	  std::cout<<GridLogMessage<<" beta(k) = "<<beta_k<<std::endl; | ||||
|  | ||||
| 	  RealD betar = 1.0/beta_k; | ||||
| 	  evec[k2] = betar * f; | ||||
| @@ -592,7 +656,10 @@ until convergence | ||||
| 	  } | ||||
| 	  setUnit_Qt(Nm,Qt); | ||||
| 	  diagonalize(eval2,lme2,Nk,Nm,Qt,grid); | ||||
| 	t1=usecond()/1e6; | ||||
| 	std::cout<<GridLogMessage <<"IRL::diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||
| 	   | ||||
| if (0) { | ||||
| 	  for(int k = 0; k<Nk; ++k) B[k]=0.0; | ||||
| 	   | ||||
| 	  for(int j = 0; j<Nk; ++j){ | ||||
| @@ -600,12 +667,34 @@ until convergence | ||||
| 	    B[j].checkerboard = evec[k].checkerboard; | ||||
| 	      B[j] += Qt[k+j*Nm] * evec[k]; | ||||
| 	    } | ||||
| //	    std::cout << "norm(B["<<j<<"])="<<norm2(B[j])<<std::endl; | ||||
| 	    std::cout<<GridLogMessage << "norm(B["<<j<<"])="<<norm2(B[j])<<std::endl; | ||||
| 	  } | ||||
| //	_sort.push(eval2,B,Nk); | ||||
| 	t1=usecond()/1e6; | ||||
| 	std::cout<<GridLogMessage <<"IRL::Convergence rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||
| } | ||||
| if (1) { | ||||
| 	for(int i=0; i<(Nk+1); ++i) { | ||||
| 		B[i] = 0.0; | ||||
| 	  	B[i].checkerboard = evec[0].checkerboard; | ||||
| 	} | ||||
|  | ||||
| 	int j_block = 24; int k_block=24; | ||||
| PARALLEL_FOR_LOOP | ||||
| 	for(int ss=0;ss < grid->oSites();ss++){ | ||||
| 	for(int jj=0; jj<Nk; jj += j_block) | ||||
| 	for(int kk=0; kk<Nk; kk += k_block) | ||||
| 	for(int j=jj; (j<Nk) && j<(jj+j_block); ++j){ | ||||
| 	for(int k=kk; (k<Nk) && k<(kk+k_block) ; ++k){ | ||||
| 	    B[j]._odata[ss] +=Qt[k+Nm*j] * evec[k]._odata[ss];  | ||||
| 	} | ||||
| 	} | ||||
| 	} | ||||
| 	t1=usecond()/1e6; | ||||
| 	std::cout<<GridLogMessage <<"IRL::convergence rotation : "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||
| } | ||||
|  | ||||
| 	  Nconv = 0; | ||||
| 	  //	  std::cout << std::setiosflags(std::ios_base::scientific); | ||||
| 	  //	  std::cout<<GridLogMessage << std::setiosflags(std::ios_base::scientific); | ||||
| 	  for(int i=0; i<Nk; ++i){ | ||||
|  | ||||
| //	    _poly(_Linop,B[i],v); | ||||
| @@ -613,14 +702,16 @@ until convergence | ||||
| 	     | ||||
| 	    RealD vnum = real(innerProduct(B[i],v)); // HermOp. | ||||
| 	    RealD vden = norm2(B[i]); | ||||
| 	    RealD vv0 = norm2(v); | ||||
| 	    eval2[i] = vnum/vden; | ||||
| 	    v -= eval2[i]*B[i]; | ||||
| 	    RealD vv = norm2(v); | ||||
| 	     | ||||
| 	    std::cout.precision(13); | ||||
| 	    std::cout << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] "; | ||||
| 	    std::cout << "eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i]; | ||||
| 	    std::cout <<" |H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv<< std::endl; | ||||
| 	    std::cout<<GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] "; | ||||
| 	    std::cout<<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i]; | ||||
| 	    std::cout<<"|H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv; | ||||
| 	    std::cout<<" "<< vnum/(sqrt(vden)*sqrt(vv0)) << std::endl; | ||||
| 	     | ||||
| 	// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged | ||||
| 	    if((vv<eresid*eresid) && (i == Nconv) ){ | ||||
| @@ -629,17 +720,19 @@ until convergence | ||||
| 	    } | ||||
|  | ||||
| 	  }  // i-loop end | ||||
| 	  //	  std::cout << std::resetiosflags(std::ios_base::scientific); | ||||
| 	  //	  std::cout<<GridLogMessage << std::resetiosflags(std::ios_base::scientific); | ||||
| 	t1=usecond()/1e6; | ||||
| 	std::cout<<GridLogMessage <<"IRL::convergence testing: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||
|  | ||||
|  | ||||
| 	  std::cout<<" #modes converged: "<<Nconv<<std::endl; | ||||
| 	  std::cout<<GridLogMessage<<" #modes converged: "<<Nconv<<std::endl; | ||||
|  | ||||
| 	  if( Nconv>=Nstop ){ | ||||
| 	    goto converged; | ||||
| 	  } | ||||
| 	} // end of iter loop | ||||
| 	 | ||||
| 	std::cout<<"\n NOT converged.\n"; | ||||
| 	std::cout<<GridLogMessage<<"\n NOT converged.\n"; | ||||
| 	abort(); | ||||
| 	 | ||||
|       converged: | ||||
| @@ -652,10 +745,10 @@ until convergence | ||||
|        } | ||||
|       _sort.push(eval,evec,Nconv); | ||||
|  | ||||
|       std::cout << "\n Converged\n Summary :\n"; | ||||
|       std::cout << " -- Iterations  = "<< Nconv  << "\n"; | ||||
|       std::cout << " -- beta(k)     = "<< beta_k << "\n"; | ||||
|       std::cout << " -- Nconv       = "<< Nconv  << "\n"; | ||||
|       std::cout<<GridLogMessage << "\n Converged\n Summary :\n"; | ||||
|       std::cout<<GridLogMessage << " -- Iterations  = "<< Nconv  << "\n"; | ||||
|       std::cout<<GridLogMessage << " -- beta(k)     = "<< beta_k << "\n"; | ||||
|       std::cout<<GridLogMessage << " -- Nconv       = "<< Nconv  << "\n"; | ||||
|      } | ||||
|  | ||||
|     ///////////////////////////////////////////////// | ||||
| @@ -678,25 +771,25 @@ until convergence | ||||
| 	} | ||||
|       } | ||||
|  | ||||
|       std::cout<<"Lanczos_Factor start/end " <<start <<"/"<<end<<std::endl; | ||||
|       std::cout<<GridLogMessage<<"Lanczos_Factor start/end " <<start <<"/"<<end<<std::endl; | ||||
|  | ||||
|       // Starting from scratch, bq[0] contains a random vector and |bq[0]| = 1 | ||||
|       int first; | ||||
|       if(start == 0){ | ||||
|  | ||||
| 	std::cout << "start == 0\n"; //TESTING | ||||
| 	std::cout<<GridLogMessage << "start == 0\n"; //TESTING | ||||
|  | ||||
| 	_poly(_Linop,bq[0],bf); | ||||
|  | ||||
| 	alpha = real(innerProduct(bq[0],bf));//alpha =  bq[0]^dag A bq[0] | ||||
|  | ||||
| 	std::cout << "alpha = " << alpha << std::endl; | ||||
| 	std::cout<<GridLogMessage << "alpha = " << alpha << std::endl; | ||||
| 	 | ||||
| 	bf = bf - alpha * bq[0];  //bf =  A bq[0] - alpha bq[0] | ||||
|  | ||||
| 	H[0][0]=alpha; | ||||
|  | ||||
| 	std::cout << "Set H(0,0) to " << H[0][0] << std::endl; | ||||
| 	std::cout<<GridLogMessage << "Set H(0,0) to " << H[0][0] << std::endl; | ||||
|  | ||||
| 	first = 1; | ||||
|  | ||||
| @@ -716,19 +809,19 @@ until convergence | ||||
|  | ||||
| 	beta = 0;sqbt = 0; | ||||
|  | ||||
| 	std::cout << "cont is true so setting beta to zero\n"; | ||||
| 	std::cout<<GridLogMessage << "cont is true so setting beta to zero\n"; | ||||
|  | ||||
|       }	else { | ||||
|  | ||||
| 	beta = norm2(bf); | ||||
| 	sqbt = sqrt(beta); | ||||
|  | ||||
| 	std::cout << "beta = " << beta << std::endl; | ||||
| 	std::cout<<GridLogMessage << "beta = " << beta << std::endl; | ||||
|       } | ||||
|  | ||||
|       for(int j=first;j<end;j++){ | ||||
|  | ||||
| 	std::cout << "Factor j " << j <<std::endl; | ||||
| 	std::cout<<GridLogMessage << "Factor j " << j <<std::endl; | ||||
|  | ||||
| 	if(cont){ // switches to factoring; understand start!=0 and initial bf value is right. | ||||
| 	  bq[j] = bf; cont = false; | ||||
| @@ -751,7 +844,7 @@ until convergence | ||||
|  | ||||
| 	beta = fnorm; | ||||
| 	sqbt = sqrt(beta); | ||||
| 	std::cout << "alpha = " << alpha << " fnorm = " << fnorm << '\n'; | ||||
| 	std::cout<<GridLogMessage << "alpha = " << alpha << " fnorm = " << fnorm << '\n'; | ||||
|  | ||||
| 	///Iterative refinement of orthogonality V = [ bq[0]  bq[1]  ...  bq[M] ] | ||||
| 	int re = 0; | ||||
| @@ -786,8 +879,8 @@ until convergence | ||||
| 	  bck = sqrt( nmbex ); | ||||
| 	  re++; | ||||
| 	} | ||||
| 	std::cout << "Iteratively refined orthogonality, changes alpha\n"; | ||||
| 	if(re > 1) std::cout << "orthagonality refined " << re << " times" <<std::endl; | ||||
| 	std::cout<<GridLogMessage << "Iteratively refined orthogonality, changes alpha\n"; | ||||
| 	if(re > 1) std::cout<<GridLogMessage << "orthagonality refined " << re << " times" <<std::endl; | ||||
| 	H[j][j]=alpha; | ||||
|       } | ||||
|  | ||||
| @@ -802,11 +895,13 @@ until convergence | ||||
|  | ||||
|     void ImplicitRestart(int TM, DenseVector<RealD> &evals,  DenseVector<DenseVector<RealD> > &evecs, DenseVector<Field> &bq, Field &bf, int cont) | ||||
|     { | ||||
|       std::cout << "ImplicitRestart begin. Eigensort starting\n"; | ||||
|       std::cout<<GridLogMessage << "ImplicitRestart begin. Eigensort starting\n"; | ||||
|  | ||||
|       DenseMatrix<RealD> H; Resize(H,Nm,Nm); | ||||
|  | ||||
| #ifndef USE_LAPACK | ||||
|       EigenSort(evals, evecs); | ||||
| #endif | ||||
|  | ||||
|       ///Assign shifts | ||||
|       int K=Nk; | ||||
| @@ -829,15 +924,15 @@ until convergence | ||||
|       /// Shifted H defines a new K step Arnoldi factorization | ||||
|       RealD  beta = H[ff][ff-1];  | ||||
|       RealD  sig  = Q[TM - 1][ff - 1]; | ||||
|       std::cout << "beta = " << beta << " sig = " << real(sig) <<std::endl; | ||||
|       std::cout<<GridLogMessage << "beta = " << beta << " sig = " << real(sig) <<std::endl; | ||||
|  | ||||
|       std::cout << "TM = " << TM << " "; | ||||
|       std::cout << norm2(bq[0]) << " -- before" <<std::endl; | ||||
|       std::cout<<GridLogMessage << "TM = " << TM << " "; | ||||
|       std::cout<<GridLogMessage << norm2(bq[0]) << " -- before" <<std::endl; | ||||
|  | ||||
|       /// q -> q Q | ||||
|       times_real(bq, Q, TM); | ||||
|  | ||||
|       std::cout << norm2(bq[0]) << " -- after " << ff <<std::endl; | ||||
|       std::cout<<GridLogMessage << norm2(bq[0]) << " -- after " << ff <<std::endl; | ||||
|       bf =  beta* bq[ff] + sig* bf; | ||||
|  | ||||
|       /// Do the rest of the factorization | ||||
| @@ -861,7 +956,7 @@ until convergence | ||||
|       int ff = Lanczos_Factor(0, M, cont, bq,bf,H); // 0--M to begin with | ||||
|  | ||||
|       if(ff < M) { | ||||
| 	std::cout << "Krylov: aborting ff "<<ff <<" "<<M<<std::endl; | ||||
| 	std::cout<<GridLogMessage << "Krylov: aborting ff "<<ff <<" "<<M<<std::endl; | ||||
| 	abort(); // Why would this happen? | ||||
|       } | ||||
|  | ||||
| @@ -870,7 +965,7 @@ until convergence | ||||
|  | ||||
|       for(int it = 0; it < Niter && (converged < Nk); ++it) { | ||||
|  | ||||
| 	std::cout << "Krylov: Iteration --> " << it << std::endl; | ||||
| 	std::cout<<GridLogMessage << "Krylov: Iteration --> " << it << std::endl; | ||||
| 	int lock_num = lock ? converged : 0; | ||||
| 	DenseVector<RealD> tevals(M - lock_num ); | ||||
| 	DenseMatrix<RealD> tevecs; Resize(tevecs,M - lock_num,M - lock_num); | ||||
| @@ -886,7 +981,7 @@ until convergence | ||||
|       Wilkinson<RealD>(H, evals, evecs, small);  | ||||
|       //      Check(); | ||||
|  | ||||
|       std::cout << "Done  "<<std::endl; | ||||
|       std::cout<<GridLogMessage << "Done  "<<std::endl; | ||||
|  | ||||
|     } | ||||
|  | ||||
| @@ -951,7 +1046,7 @@ until convergence | ||||
| 		  DenseVector<RealD> &tevals, DenseVector<DenseVector<RealD> > &tevecs,  | ||||
| 		  int lock, int converged) | ||||
|     { | ||||
|       std::cout << "Converged " << converged << " so far." << std::endl; | ||||
|       std::cout<<GridLogMessage << "Converged " << converged << " so far." << std::endl; | ||||
|       int lock_num = lock ? converged : 0; | ||||
|       int M = Nm; | ||||
|  | ||||
| @@ -966,7 +1061,9 @@ until convergence | ||||
|       RealD small=1.0e-16; | ||||
|       Wilkinson<RealD>(AH, tevals, tevecs, small); | ||||
|  | ||||
| #ifndef USE_LAPACK | ||||
|       EigenSort(tevals, tevecs); | ||||
| #endif | ||||
|  | ||||
|       RealD resid_nrm=  norm2(bf); | ||||
|  | ||||
| @@ -977,7 +1074,7 @@ until convergence | ||||
| 	RealD diff = 0; | ||||
| 	diff = abs( tevecs[i][Nm - 1 - lock_num] ) * resid_nrm; | ||||
|  | ||||
| 	std::cout << "residual estimate " << SS-1-i << " " << diff << " of (" << tevals[i] << ")" << std::endl; | ||||
| 	std::cout<<GridLogMessage << "residual estimate " << SS-1-i << " " << diff << " of (" << tevals[i] << ")" << std::endl; | ||||
|  | ||||
| 	if(diff < converged) { | ||||
|  | ||||
| @@ -993,13 +1090,13 @@ until convergence | ||||
| 	    lock_num++; | ||||
| 	  } | ||||
| 	  converged++; | ||||
| 	  std::cout << " converged on eval " << converged << " of " << Nk << std::endl; | ||||
| 	  std::cout<<GridLogMessage << " converged on eval " << converged << " of " << Nk << std::endl; | ||||
| 	} else { | ||||
| 	  break; | ||||
| 	} | ||||
|       } | ||||
| #endif | ||||
|       std::cout << "Got " << converged << " so far " <<std::endl;	 | ||||
|       std::cout<<GridLogMessage << "Got " << converged << " so far " <<std::endl;	 | ||||
|     } | ||||
|  | ||||
|     ///Check | ||||
| @@ -1008,7 +1105,9 @@ until convergence | ||||
|  | ||||
|       DenseVector<RealD> goodval(this->get); | ||||
|  | ||||
| #ifndef USE_LAPACK | ||||
|       EigenSort(evals,evecs); | ||||
| #endif | ||||
|  | ||||
|       int NM = Nm; | ||||
|  | ||||
| @@ -1080,10 +1179,10 @@ say con = 2 | ||||
| **/ | ||||
|  | ||||
| template<class T> | ||||
| static void Lock(DenseMatrix<T> &H, 	// Hess mtx	 | ||||
| 		 DenseMatrix<T> &Q, 	// Lock Transform | ||||
| 		 T val, 		// value to be locked | ||||
| 		 int con, 	// number already locked | ||||
| static void Lock(DenseMatrix<T> &H, 	///Hess mtx	 | ||||
| 		 DenseMatrix<T> &Q, 	///Lock Transform | ||||
| 		 T val, 		///value to be locked | ||||
| 		 int con, 	///number already locked | ||||
| 		 RealD small, | ||||
| 		 int dfg, | ||||
| 		 bool herm) | ||||
|   | ||||
| @@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #include <iomanip> | ||||
| #include <complex> | ||||
| #include <typeinfo> | ||||
| #include <Grid/Grid.h> | ||||
| #include <Grid.h> | ||||
|  | ||||
|  | ||||
| /** Sign function **/ | ||||
|   | ||||
| @@ -81,10 +81,12 @@ int main(int argc, char** argv) { | ||||
|   RealD M5 = 1.8; | ||||
|   std::vector < std::complex<double>  > omegas; | ||||
|   for(int i=0;i<Ls;i++){ | ||||
|   	std::complex<double> temp (0.25+0.00*i, 0.0+0.00*i); | ||||
|  	 omegas.push_back(temp); | ||||
| 	double imag = 0.; | ||||
| 	if (i==0) imag=1.; | ||||
| 	if (i==Ls-1) imag=-1.; | ||||
| 	std::complex<double> temp (0.25+0.01*i, imag*0.01); | ||||
| 	omegas.push_back(temp); | ||||
|   } | ||||
| //  DomainWallFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5); | ||||
|   ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omegas,1.,0.); | ||||
|  | ||||
|   LatticeFermion src_o(FrbGrid); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user