/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h Copyright (C) 2015 Author: Chulwoo Jung This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END/ LEGAL */ #ifndef GRID_CONJUGATE_GRADIENT_MULTI_MIXED_PREC_H #define GRID_CONJUGATE_GRADIENT_MULTI_MIXED_PREC_H namespace Grid { //Mixed precision restarted defect correction CG template::value == 2, int>::type = 0 //, typename std::enable_if< getPrecision::value == 1, int>::type = 0 > class MixedPrecisionConjugateGradientMultiShift : public LinearFunction { public: // RealD Tolerance; Integer MaxInnerIterations; Integer MaxOuterIterations; GridBase* SinglePrecGrid; //Grid for single-precision fields RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance LinearOperatorBase &Linop_f; LinearOperatorBase &Linop_d; MultiShiftFunction shifts; Integer iter; //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess // LinearFunction *guesser; MixedPrecisionConjugateGradientMultiShift(GridBase* _sp_grid, LinearOperatorBase &_Linop_f, LinearOperatorBase &_Linop_d, Integer maxinnerit, MultiShiftFunction &_shifts ) : Linop_f(_Linop_f), Linop_d(_Linop_d), MaxInnerIterations(maxinnerit), SinglePrecGrid(_sp_grid), OuterLoopNormMult(100.), shifts(_shifts) {}; void operator() (const FieldD &src_d_in, FieldD &sol_d){ assert(0); // not yet implemented } void operator() (const FieldD &src_d_in, std::vector &sol_d){ GridStopWatch TotalTimer; TotalTimer.Start(); int cb = src_d_in.checkerboard; int nshift = shifts.order; assert(nshift == sol_d.size()); for(int i=0;i sol_f(nshift,SinglePrecGrid); for(int i=0;i CG_f(inner_tol, MaxInnerIterations); ConjugateGradientMultiShift MSCG(MaxInnerIterations,shifts); // CG_f.ErrorOnNoConverge = false; GridStopWatch InnerCGtimer; GridStopWatch PrecChangeTimer; { // std::cout< &mass(shifts.poles); // Make references to array in "shifts" std::vector &mresidual(shifts.tolerances); std::vector alpha(nshift,1.); std::vector ps(nshift,grid);// Search directions assert(sol_f.size()==nshift); assert(mass.size()==nshift); assert(mresidual.size()==nshift); // dynamic sized arrays on stack; 2d is a pain with vector RealD bs[nshift]; RealD rsq[nshift]; RealD z[nshift][2]; int converged[nshift]; const int primary =0; //Primary shift fields CG iteration RealD a,b,c,d; RealD cp,bp,qq; //prev int cb=src_f.checkerboard; // Matrix mult fields FieldF r(grid); r.checkerboard = src_f.checkerboard; FieldF p(grid); p.checkerboard = src_f.checkerboard; FieldF tmp(grid); tmp.checkerboard = src_f.checkerboard; FieldF mmp(grid);mmp.checkerboard = src_f.checkerboard; FieldF psi(grid);psi.checkerboard = src_f.checkerboard; std::cout.precision(12); std::cout< 2+Ls, so ~ 3x saving // Pipelined CG gain: // // New Kernel: Load r, vector of coeffs, vector of pointers ps // New Kernel: Load sol_f[0], vector of coeffs, vector of pointers ps // If can predict the coefficient bs then we can fuse these and avoid write reread cyce // on ps[s]. // Before: 3 x npole + 3 x npole // After : 2 x npole (ps[s]) => 3x speed up of multishift CG. if( (!converged[s]) ) { axpy(sol_f[ss],-bs[s]*alpha[s],ps[s],sol_f[ss]); } } if (k%MaxInnerIterations==0){ // if (c < 1e-4*c_relup){ RealD c_f=c; precisionChange(tmp_d,psi); RealD sol_norm =axpy_norm (psi_d,1.,tmp_d,psi_d); tmp1 = norm2(psi); zeroit(psi); tmp2 = norm2(psi); std::cout<